scst/scst/src/scst_mem.c - backupdr - Git at Google

 /*
  *  scst_mem.c
  *
  *  Copyright (C) 2006 - 2018 Vladislav Bolkhovitin <vst@vlnb.net>
  *  Copyright (C) 2007 - 2018 Western Digital Corporation
  *
  *  This program is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU General Public License
  *  as published by the Free Software Foundation, version 2
  *  of the License.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  *  GNU General Public License for more details.
  */

 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/unistd.h>
 #include <linux/string.h>

 #ifdef INSIDE_KERNEL_TREE
 #include <scst/scst.h>
 #else
 #include "scst.h"
 #endif
 #include "scst_priv.h"
 #include "scst_mem.h"

 #define SGV_DEFAULT_PURGE_INTERVAL	(60 * HZ)
 #define SGV_MIN_SHRINK_INTERVAL		(1 * HZ)

 /* Max pages freed from a pool per shrinking iteration */
 #define MAX_PAGES_PER_POOL	50

 bool scst_force_global_sgv_pool;

 static struct sgv_pool *sgv_dma_pool_per_cpu[NR_CPUS];
 static struct sgv_pool *sgv_norm_clust_pool_per_cpu[NR_CPUS];
 static struct sgv_pool *sgv_norm_pool_per_cpu[NR_CPUS];

 static struct sgv_pool *sgv_dma_pool_global[NR_CPUS];
 static struct sgv_pool *sgv_norm_clust_pool_global[NR_CPUS];
 static struct sgv_pool *sgv_norm_pool_global[NR_CPUS];

 static struct sgv_pool *sgv_norm_clust_pool_main, *sgv_norm_pool_main, *sgv_dma_pool_main;

 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
 #if defined(CONFIG_LOCKDEP) && !defined(CONFIG_SCST_PROC)
 static struct lock_class_key scst_pool_key;
 static struct lockdep_map scst_pool_dep_map =
 	STATIC_LOCKDEP_MAP_INIT("scst_pool_kref", &scst_pool_key);
 #endif
 #endif

 #ifndef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 static atomic_t sgv_pages_total = ATOMIC_INIT(0);
 #endif

 /* Both read-only */
 static int sgv_hi_wmk;
 static int sgv_lo_wmk;

 static int sgv_max_local_pages, sgv_max_trans_pages;

 static DEFINE_SPINLOCK(sgv_pools_lock); /* inner lock for sgv_pool_lock! */
 static DEFINE_MUTEX(sgv_pools_mutex);

 static atomic_t sgv_releases_on_hiwmk = ATOMIC_INIT(0);
 static atomic_t sgv_releases_on_hiwmk_failed = ATOMIC_INIT(0);

 #ifndef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 static atomic_t sgv_other_total_alloc = ATOMIC_INIT(0);
 #endif

 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
 static struct shrinker *sgv_shrinker;
 #else
 static struct shrinker sgv_shrinker;
 #endif

 static struct kmem_cache *sgv_pool_cachep;

 /*
  * Protected by sgv_pools_mutex AND sgv_pools_lock for writes,
  * either one for reads.
  */
 static LIST_HEAD(sgv_pools_list);

 static struct kobject *scst_sgv_kobj;
 static int scst_sgv_sysfs_create(struct sgv_pool *pool);
 static void scst_sgv_sysfs_del(struct sgv_pool *pool);

 static inline bool sgv_pool_clustered(const struct sgv_pool *pool)
 {
 	return pool->clustering_type != sgv_no_clustering;
 }

 void scst_sgv_pool_use_norm(struct scst_tgt_dev *tgt_dev)
 {
 	tgt_dev->tgt_dev_gfp_mask = __GFP_NOWARN;
 	if (!scst_force_global_sgv_pool)
 		tgt_dev->pools = sgv_norm_pool_per_cpu;
 	else
 		tgt_dev->pools = sgv_norm_pool_global;
 	tgt_dev->tgt_dev_clust_pool = 0;
 }

 void scst_sgv_pool_use_norm_clust(struct scst_tgt_dev *tgt_dev)
 {
 	TRACE_MEM("%s", "Use clustering");
 	tgt_dev->tgt_dev_gfp_mask = __GFP_NOWARN;
 	if (!scst_force_global_sgv_pool)
 		tgt_dev->pools = sgv_norm_clust_pool_per_cpu;
 	else
 		tgt_dev->pools = sgv_norm_clust_pool_global;
 	tgt_dev->tgt_dev_clust_pool = 1;
 }

 void scst_sgv_pool_use_dma(struct scst_tgt_dev *tgt_dev)
 {
 	TRACE_MEM("%s", "Use ISA DMA memory");
 	tgt_dev->tgt_dev_gfp_mask = __GFP_NOWARN | GFP_DMA;
 	if (!scst_force_global_sgv_pool)
 		tgt_dev->pools = sgv_dma_pool_per_cpu;
 	else
 		tgt_dev->pools = sgv_dma_pool_global;
 	tgt_dev->tgt_dev_clust_pool = 0;
 }

 /* Must be no locks */
 static void sgv_dtor_and_free(struct sgv_pool_obj *obj)
 {
 	struct sgv_pool *pool = obj->owner_pool;

 	TRACE_MEM("Destroying sgv obj %p", obj);

 	if (obj->sg_count != 0) {
 		pool->alloc_fns.free_pages_fn(obj->sg_entries,
 			obj->sg_count, obj->allocator_priv);
 	}
 	if (obj->sg_entries != obj->sg_entries_data) {
 		if (obj->trans_tbl !=
 		    (struct trans_tbl_ent *)obj->sg_entries_data) {
 			/* kfree() handles NULL parameter */
 			kfree(obj->trans_tbl);
 			obj->trans_tbl = NULL;
 		}
 		kfree(obj->sg_entries);
 	}

 	kmem_cache_free(pool->caches[obj->cache_num], obj);
 	return;
 }

 /* Must be called under sgv_pool_lock held */
 static void sgv_dec_cached_entries(struct sgv_pool *pool, int pages)
 {
 	pool->cached_entries--;
 	pool->cached_pages -= pages;
 }

 /* Must be called under sgv_pool_lock held */
 static void __sgv_purge_from_cache(struct sgv_pool_obj *obj)
 {
 	int pages = obj->pages;
 	struct sgv_pool *pool = obj->owner_pool;

 	TRACE_MEM("Purging sgv obj %p from pool %p (new cached_entries %d)",
 		obj, pool, pool->cached_entries-1);

 	list_del(&obj->sorted_recycling_list_entry);
 	list_del(&obj->recycling_list_entry);

 	pool->inactive_cached_pages -= pages;
 	sgv_dec_cached_entries(pool, pages);

 #ifndef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 	atomic_sub(pages, &sgv_pages_total);
 #endif

 	return;
 }

 /* Must be called under sgv_pool_lock held */
 static bool sgv_purge_from_cache(struct sgv_pool_obj *obj, int min_interval,
 	unsigned long cur_time)
 {
 	EXTRACHECKS_BUG_ON(min_interval < 0);

 	TRACE_MEM("Checking if sgv obj %p should be purged (cur time %ld, "
 		"obj time %ld, time to purge %ld)", obj, cur_time,
 		obj->time_stamp, obj->time_stamp + min_interval);

 	if (time_after_eq(cur_time, (obj->time_stamp + min_interval))) {
 		__sgv_purge_from_cache(obj);
 		return true;
 	}
 	return false;
 }

 /* No locks */
 static int sgv_shrink_pool(struct sgv_pool *pool, int nr, int min_interval,
 	unsigned long cur_time, int *out_freed)
 {
 	int freed = 0;

 	TRACE_ENTRY();

 	TRACE_MEM("Trying to shrink pool %p (nr %d, min_interval %d)",
 		pool, nr, min_interval);

 	if (pool->purge_interval < 0) {
 		TRACE_MEM("Not shrinkable pool %p, skipping", pool);
 		goto out;
 	}

 	spin_lock_bh(&pool->sgv_pool_lock);

 	while (!list_empty(&pool->sorted_recycling_list) &&
 #ifdef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 			true) {
 #else
 			(atomic_read(&sgv_pages_total) > sgv_lo_wmk)) {
 #endif
 		struct sgv_pool_obj *obj = list_first_entry(
 			&pool->sorted_recycling_list,
 			struct sgv_pool_obj, sorted_recycling_list_entry);

 		if (sgv_purge_from_cache(obj, min_interval, cur_time)) {
 			int pages = obj->pages;

 			freed += pages;
 			nr -= pages;

 			TRACE_MEM("%d pages purged from pool %p (nr left %d, "
 				"total freed %d)", pages, pool, nr, freed);

 			spin_unlock_bh(&pool->sgv_pool_lock);
 			sgv_dtor_and_free(obj);
 			spin_lock_bh(&pool->sgv_pool_lock);
 		} else
 			break;

 		if ((nr <= 0) || (freed >= MAX_PAGES_PER_POOL)) {
 			if (freed >= MAX_PAGES_PER_POOL)
 				TRACE_MEM("%d pages purged from pool %p, "
 					"leaving", freed, pool);
 			break;
 		}
 	}

 	spin_unlock_bh(&pool->sgv_pool_lock);

 out:
 	*out_freed += freed;

 	TRACE_EXIT_RES(nr);
 	return nr;
 }

 /* No locks */
 static int __sgv_shrink(int nr, int min_interval, int *out_freed)
 {
 	struct sgv_pool *pool;
 	unsigned long cur_time = jiffies;
 	int prev_nr = nr + 1;

 	TRACE_ENTRY();

 	TRACE_MEM("Trying to shrink %d pages from all sgv pools "
 		"(min_interval %d)", nr, min_interval);

 	while (prev_nr > nr && nr > 0) {
 		prev_nr = nr;

 		rcu_read_lock();
 		list_for_each_entry_rcu(pool, &sgv_pools_list,
 					sgv_pools_list_entry) {
 			if (pool->cached_entries)
 				nr = sgv_shrink_pool(pool, nr, min_interval,
 						     cur_time, out_freed);
 		}
 		rcu_read_unlock();
 	}

 	TRACE_EXIT_RES(nr);
 	return nr;
 }

 static unsigned long __sgv_can_be_shrunk(void)
 {
 	unsigned long res;
 	struct sgv_pool *pool;
 	int inactive_pages = 0;

 	TRACE_ENTRY();

 	spin_lock_bh(&sgv_pools_lock);
 	list_for_each_entry(pool, &sgv_pools_list, sgv_pools_list_entry) {
 		if (pool->purge_interval > 0)
 			inactive_pages += pool->inactive_cached_pages;
 	}
 	spin_unlock_bh(&sgv_pools_lock);

 	res = max(0, inactive_pages - sgv_lo_wmk);
 #ifndef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 	TRACE_MEM("Can free %ld (total %d)", res, atomic_read(&sgv_pages_total));
 #endif

 	TRACE_EXIT_RES(res);
 	return res;
 }

 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
 static unsigned long sgv_can_be_shrunk(struct shrinker *shrinker,
 					 struct shrink_control *sc)
 {
 	return __sgv_can_be_shrunk();
 }

 static unsigned long sgv_scan_shrink(struct shrinker *shrinker,
 				     struct shrink_control *sc)
 {
 	int freed = 0;

 	TRACE_ENTRY();

 	__sgv_shrink(sc->nr_to_scan, SGV_MIN_SHRINK_INTERVAL, &freed);
 	TRACE_MEM("Freed %d", freed);

 	TRACE_EXIT_RES(freed);
 	return freed;
 }
 #else /* if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) */
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) && (!defined(RHEL_MAJOR) || RHEL_MAJOR -0 < 6)
 static int sgv_shrink(int nr, gfp_t gfpm)
 #elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
 static int sgv_shrink(struct shrinker *shrinker, int nr, gfp_t gfpm)
 #else
 static int sgv_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 #endif
 {
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
 	int nr = sc->nr_to_scan;
 #endif
 	int freed = 0;

 	TRACE_ENTRY();

 	if (nr > 0) {
 		nr = __sgv_shrink(nr, SGV_MIN_SHRINK_INTERVAL, &freed);
 		TRACE_MEM("Left %d", nr);
 	} else
 		nr = __sgv_can_be_shrunk();

 	TRACE_EXIT_RES(nr);
 	return nr;
 }
 #endif /* if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) */

 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
 static void sgv_purge_work_fn(void *p)
 #else
 static void sgv_purge_work_fn(struct work_struct *work)
 #endif
 {
 	unsigned long cur_time = jiffies;
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
 	struct sgv_pool *pool = (struct sgv_pool *)p;
 #else
 	struct sgv_pool *pool = container_of(work, struct sgv_pool,
 					     sgv_purge_work.work);
 #endif

 	TRACE_ENTRY();

 	TRACE_MEM("Purge work for pool %p", pool);

 	spin_lock_bh(&pool->sgv_pool_lock);

 	pool->purge_work_scheduled = false;

 	while (!list_empty(&pool->sorted_recycling_list)) {
 		struct sgv_pool_obj *obj = list_first_entry(
 			&pool->sorted_recycling_list,
 			struct sgv_pool_obj, sorted_recycling_list_entry);

 		if (sgv_purge_from_cache(obj, pool->purge_interval, cur_time)) {
 			spin_unlock_bh(&pool->sgv_pool_lock);
 			sgv_dtor_and_free(obj);
 			spin_lock_bh(&pool->sgv_pool_lock);
 		} else {
 			/*
 			 * Let's reschedule it for full period to not get here
 			 * too often. In the worst case we have shrinker
 			 * to reclaim buffers more quickly.
 			 */
 			TRACE_MEM("Rescheduling purge work for pool %p (delay "
 				"%d HZ/%d sec)", pool, pool->purge_interval,
 				pool->purge_interval/HZ);
 			schedule_delayed_work(&pool->sgv_purge_work,
 				pool->purge_interval);
 			pool->purge_work_scheduled = true;
 			break;
 		}
 	}

 	spin_unlock_bh(&pool->sgv_pool_lock);

 	TRACE_MEM("Leaving purge work for pool %p", pool);

 	TRACE_EXIT();
 	return;
 }

 static int sgv_check_full_clustering(struct scatterlist *sg, int cur, int hint)
 {
 	int res = -1;
 	int i = hint;
 	unsigned long pfn_cur = page_to_pfn(sg_page(&sg[cur]));
 	int len_cur = sg[cur].length;
 	unsigned long pfn_cur_next = pfn_cur + (len_cur >> PAGE_SHIFT);
 	int full_page_cur = (len_cur & (PAGE_SIZE - 1)) == 0;
 	unsigned long pfn, pfn_next;
 	bool full_page;

 #if 0
 	TRACE_MEM("pfn_cur %ld, pfn_cur_next %ld, len_cur %d, full_page_cur %d",
 		pfn_cur, pfn_cur_next, len_cur, full_page_cur);
 #endif

 	/* check the hint first */
 	if (i >= 0) {
 		pfn = page_to_pfn(sg_page(&sg[i]));
 		pfn_next = pfn + (sg[i].length >> PAGE_SHIFT);
 		full_page = (sg[i].length & (PAGE_SIZE - 1)) == 0;

 		if ((pfn == pfn_cur_next) && full_page_cur)
 			goto out_head;

 		if ((pfn_next == pfn_cur) && full_page)
 			goto out_tail;
 	}

 	/* ToDo: implement more intelligent search */
 	for (i = cur - 1; i >= 0; i--) {
 		pfn = page_to_pfn(sg_page(&sg[i]));
 		pfn_next = pfn + (sg[i].length >> PAGE_SHIFT);
 		full_page = (sg[i].length & (PAGE_SIZE - 1)) == 0;

 		if ((pfn == pfn_cur_next) && full_page_cur)
 			goto out_head;

 		if ((pfn_next == pfn_cur) && full_page)
 			goto out_tail;
 	}

 out:
 	return res;

 out_tail:
 	TRACE_MEM("SG segment %d will be tail merged with segment %d", cur, i);
 	sg[i].length += len_cur;
 	sg_clear(&sg[cur]);
 	res = i;
 	goto out;

 out_head:
 	TRACE_MEM("SG segment %d will be head merged with segment %d", cur, i);
 	sg_assign_page(&sg[i], sg_page(&sg[cur]));
 	sg[i].length += len_cur;
 	sg_clear(&sg[cur]);
 	res = i;
 	goto out;
 }

 static int sgv_check_tail_clustering(struct scatterlist *sg, int cur, int hint)
 {
 	int res = -1;
 	unsigned long pfn_cur = page_to_pfn(sg_page(&sg[cur]));
 	int len_cur = sg[cur].length;
 	int prev;
 	unsigned long pfn_prev;
 	bool full_page;

 #ifdef SCST_HIGHMEM
 	if (page >= highmem_start_page) {
 		TRACE_MEM("%s", "HIGHMEM page allocated, no clustering")
 		goto out;
 	}
 #endif

 #if 0
 	TRACE_MEM("pfn_cur %ld, pfn_cur_next %ld, len_cur %d, full_page_cur %d",
 		pfn_cur, pfn_cur_next, len_cur, full_page_cur);
 #endif

 	if (cur == 0)
 		goto out;

 	prev = cur - 1;
 	pfn_prev = page_to_pfn(sg_page(&sg[prev])) +
 			(sg[prev].length >> PAGE_SHIFT);
 	full_page = (sg[prev].length & (PAGE_SIZE - 1)) == 0;

 	if ((pfn_prev == pfn_cur) && full_page) {
 		TRACE_MEM("SG segment %d will be tail merged with segment %d",
 			cur, prev);
 		sg[prev].length += len_cur;
 		sg_clear(&sg[cur]);
 		res = prev;
 	}

 out:
 	return res;
 }

 static void sgv_free_sys_sg_entries(struct scatterlist *sg, int sg_count,
 	void *priv)
 {
 	int i;

 	TRACE_MEM("sg=%p, sg_count=%d", sg, sg_count);

 	for (i = 0; i < sg_count; i++) {
 		struct page *p = sg_page(&sg[i]);
 		int len = sg[i].length;
 		int pages = PAGE_ALIGN(len) >> PAGE_SHIFT;

 		TRACE_MEM("page %lx, len %d, pages %d",
 			(unsigned long)p, len, pages);

 		while (pages > 0) {
 			int order = 0;

 			TRACE_MEM("free_pages(): order %d, page %lx",
 				order, (unsigned long)p);

 			__free_pages(p, order);

 			pages -= 1 << order;
 			p += 1 << order;
 		}
 	}
 }

 static struct page *sgv_alloc_sys_pages(struct scatterlist *sg,
 	gfp_t gfp_mask, void *priv)
 {
 	struct page *page = alloc_pages(gfp_mask, 0);

 	sg_set_page(sg, page, PAGE_SIZE, 0);
 	TRACE_MEM("page=%p, sg=%p, priv=%p", page, sg, priv);
 	if (page == NULL) {
 		TRACE(TRACE_OUT_OF_MEM, "%s", "Allocation of "
 			"sg page failed");
 	}
 	return page;
 }

 static int sgv_alloc_sg_entries(struct scatterlist *sg, int pages,
 	gfp_t gfp_mask, enum sgv_clustering_types clustering_type,
 	struct trans_tbl_ent *trans_tbl,
 	const struct sgv_pool_alloc_fns *alloc_fns, void *priv)
 {
 	int sg_count = 0;
 	int pg, i, j;
 	int merged = -1;

 	TRACE_MEM("pages=%d, clustering_type=%d", pages, clustering_type);

 #if 0
 	gfp_mask |= __GFP_COLD;
 #endif
 #ifdef CONFIG_SCST_STRICT_SECURITY
 	gfp_mask |= __GFP_ZERO;
 #endif

 	for (pg = 0; pg < pages; pg++) {
 		void *rc;
 #ifdef CONFIG_SCST_DEBUG_OOM
 		if (((gfp_mask & __GFP_NOFAIL) != __GFP_NOFAIL) &&
 		    ((scst_random() % 10000) == 55))
 			rc = NULL;
 		else
 #endif
 			rc = alloc_fns->alloc_pages_fn(&sg[sg_count], gfp_mask,
 				priv);
 		if (rc == NULL)
 			goto out_no_mem;

 		/*
 		 * This code allows compiler to see full body of the clustering
 		 * functions and gives it a chance to generate better code.
 		 * At least, the resulting code is smaller, comparing to
 		 * calling them using a function pointer.
 		 */
 		if (clustering_type == sgv_full_clustering)
 			merged = sgv_check_full_clustering(sg, sg_count, merged);
 		else if (clustering_type == sgv_tail_clustering)
 			merged = sgv_check_tail_clustering(sg, sg_count, merged);
 		else
 			merged = -1;

 		if (merged == -1)
 			sg_count++;

 		TRACE_MEM("pg=%d, merged=%d, sg_count=%d", pg, merged,
 			sg_count);
 	}

 	if ((clustering_type != sgv_no_clustering) && (trans_tbl != NULL)) {
 		pg = 0;
 		for (i = 0; i < pages; i++) {
 			int n = PAGE_ALIGN(sg[i].length) >> PAGE_SHIFT;

 			trans_tbl[i].pg_count = pg;
 			for (j = 0; j < n; j++)
 				trans_tbl[pg++].sg_num = i+1;
 			TRACE_MEM("i=%d, n=%d, pg_count=%d", i, n,
 				trans_tbl[i].pg_count);
 		}
 	}

 out:
 	TRACE_MEM("sg_count=%d", sg_count);
 	return sg_count;

 out_no_mem:
 	alloc_fns->free_pages_fn(sg, sg_count, priv);
 	sg_count = 0;
 	goto out;
 }

 static int sgv_alloc_arrays(struct sgv_pool_obj *obj,
 	int pages_to_alloc, gfp_t gfp_mask)
 {
 	int sz, tsz = 0;
 	int res = 0;

 	TRACE_ENTRY();

 	sz = pages_to_alloc * sizeof(obj->sg_entries[0]);

 	obj->sg_entries = kmalloc(sz, gfp_mask);
 	if (unlikely(obj->sg_entries == NULL)) {
 		TRACE(TRACE_OUT_OF_MEM, "Allocation of sgv_pool_obj "
 			"SG vector failed (size %d)", sz);
 		res = -ENOMEM;
 		goto out;
 	}

 	sg_init_table(obj->sg_entries, pages_to_alloc);

 	if (sgv_pool_clustered(obj->owner_pool)) {
 		if (pages_to_alloc <= sgv_max_trans_pages) {
 			obj->trans_tbl =
 				(struct trans_tbl_ent *)obj->sg_entries_data;
 			/*
 			 * No need to clear trans_tbl, if needed, it will be
 			 * fully rewritten in sgv_alloc_sg_entries()
 			 */
 		} else {
 			tsz = pages_to_alloc * sizeof(obj->trans_tbl[0]);
 			obj->trans_tbl = kzalloc(tsz, gfp_mask);
 			if (unlikely(obj->trans_tbl == NULL)) {
 				TRACE(TRACE_OUT_OF_MEM, "Allocation of "
 					"trans_tbl failed (size %d)", tsz);
 				res = -ENOMEM;
 				goto out_free;
 			}
 		}
 	}

 	TRACE_MEM("pages_to_alloc %d, sz %d, tsz %d, obj %p, sg_entries %p, "
 		"trans_tbl %p", pages_to_alloc, sz, tsz, obj, obj->sg_entries,
 		obj->trans_tbl);

 out:
 	TRACE_EXIT_RES(res);
 	return res;

 out_free:
 	kfree(obj->sg_entries);
 	obj->sg_entries = NULL;
 	goto out;
 }

 static struct sgv_pool_obj *sgv_get_obj(struct sgv_pool *pool, int cache_num,
 	int pages, gfp_t gfp_mask, bool get_new)
 {
 	struct sgv_pool_obj *obj;

 	spin_lock_bh(&pool->sgv_pool_lock);

 	if (unlikely(get_new)) {
 		/* Used only for buffers preallocation */
 		goto get_new;
 	}

 	if (likely(!list_empty(&pool->recycling_lists[cache_num]))) {
 		obj = list_first_entry(&pool->recycling_lists[cache_num],
 			 struct sgv_pool_obj, recycling_list_entry);

 		list_del(&obj->sorted_recycling_list_entry);
 		list_del(&obj->recycling_list_entry);

 		pool->inactive_cached_pages -= pages;

 		spin_unlock_bh(&pool->sgv_pool_lock);
 		goto out;
 	}

 get_new:
 	pool->cached_entries++;
 	pool->cached_pages += pages;

 	spin_unlock_bh(&pool->sgv_pool_lock);

 	TRACE_MEM("New cached entries %d (pool %p)", pool->cached_entries,
 		pool);

 	obj = kmem_cache_alloc(pool->caches[cache_num],
 		gfp_mask & ~(__GFP_HIGHMEM|GFP_DMA));
 	if (likely(obj)) {
 		memset(obj, 0, sizeof(*obj));
 		obj->cache_num = cache_num;
 		obj->pages = pages;
 		obj->owner_pool = pool;
 	} else {
 		spin_lock_bh(&pool->sgv_pool_lock);
 		sgv_dec_cached_entries(pool, pages);
 		spin_unlock_bh(&pool->sgv_pool_lock);
 	}

 out:
 	return obj;
 }

 static void sgv_put_obj(struct sgv_pool_obj *obj)
 {
 	struct sgv_pool *pool = obj->owner_pool;
 	struct list_head *entry;
 	struct list_head *list = &pool->recycling_lists[obj->cache_num];
 	int pages = obj->pages;

 	spin_lock_bh(&pool->sgv_pool_lock);

 	TRACE_MEM("sgv %p, cache num %d, pages %d, sg_count %d", obj,
 		obj->cache_num, pages, obj->sg_count);

 	if (sgv_pool_clustered(pool)) {
 		/* Make objects with less entries more preferred */
 		__list_for_each(entry, list) {
 			struct sgv_pool_obj *tmp = list_entry(entry,
 				struct sgv_pool_obj, recycling_list_entry);

 			TRACE_MEM("tmp %p, cache num %d, pages %d, sg_count %d",
 				tmp, tmp->cache_num, tmp->pages, tmp->sg_count);

 			if (obj->sg_count <= tmp->sg_count)
 				break;
 		}
 		entry = entry->prev;
 	} else
 		entry = list;

 	TRACE_MEM("Adding in %p (list %p)", entry, list);
 	list_add(&obj->recycling_list_entry, entry);

 	list_add_tail(&obj->sorted_recycling_list_entry,
 		&pool->sorted_recycling_list);

 	obj->time_stamp = jiffies;

 	pool->inactive_cached_pages += pages;

 	if (!pool->purge_work_scheduled) {
 		TRACE_MEM("Scheduling purge work for pool %p", pool);
 		pool->purge_work_scheduled = true;
 		schedule_delayed_work(&pool->sgv_purge_work,
 			pool->purge_interval);
 	}

 	spin_unlock_bh(&pool->sgv_pool_lock);
 	return;
 }

 /* No locks */
 static int sgv_hiwmk_check(int pages_to_alloc)
 {
 	int res = 0;
 #ifndef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 	int pages = pages_to_alloc;

 	pages += atomic_read(&sgv_pages_total);

 	if (unlikely(pages > sgv_hi_wmk)) {
 		int freed = 0;

 		pages -= sgv_hi_wmk;
 		atomic_inc(&sgv_releases_on_hiwmk);

 		pages = __sgv_shrink(pages, 0, &freed);
 		if (pages > 0) {
 			TRACE(TRACE_OUT_OF_MEM, "Requested amount of "
 			    "memory (%d pages) for being executed "
 			    "commands together with the already "
 			    "allocated memory exceeds the allowed "
 			    "maximum %d. Should you increase "
 			    "scst_max_cmd_mem?", pages_to_alloc,
 			   sgv_hi_wmk);
 			atomic_inc(&sgv_releases_on_hiwmk_failed);
 			res = -ENOMEM;
 			goto out_unlock;
 		}
 	}

 	atomic_add(pages_to_alloc, &sgv_pages_total);

 out_unlock:
 	TRACE_MEM("pages_to_alloc %d, new total %d", pages_to_alloc,
 		atomic_read(&sgv_pages_total));
 #endif
 	return res;
 }

 /* No locks */
 static void sgv_hiwmk_uncheck(int pages)
 {
 #ifndef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 	atomic_sub(pages, &sgv_pages_total);
 	TRACE_MEM("pages %d, new total %d", pages,
 		atomic_read(&sgv_pages_total));
 #endif
 	return;
 }

 /* No locks */
 static bool sgv_check_allowed_mem(struct scst_mem_lim *mem_lim, int pages)
 {
 	int alloced;
 	bool res = true;

 	alloced = atomic_add_return(pages, &mem_lim->alloced_pages);
 	if (unlikely(alloced > mem_lim->max_allowed_pages)) {
 		TRACE(TRACE_OUT_OF_MEM, "Requested amount of memory "
 			"(%d pages) for being executed commands on a device "
 			"together with the already allocated memory exceeds "
 			"the allowed maximum %d. Should you increase "
 			"scst_max_dev_cmd_mem?", pages,
 			mem_lim->max_allowed_pages);
 		atomic_sub(pages, &mem_lim->alloced_pages);
 		res = false;
 	}

 	TRACE_MEM("mem_lim %p, pages %d, res %d, new alloced %d", mem_lim,
 		pages, res, atomic_read(&mem_lim->alloced_pages));

 	return res;
 }

 /* No locks */
 static void sgv_uncheck_allowed_mem(struct scst_mem_lim *mem_lim, int pages)
 {
 	atomic_sub(pages, &mem_lim->alloced_pages);

 	TRACE_MEM("mem_lim %p, pages %d, new alloced %d", mem_lim,
 		pages, atomic_read(&mem_lim->alloced_pages));
 	return;
 }

 /**
  * sgv_pool_alloc - allocate an SG vector from the SGV pool
  * @pool:	the cache to alloc from
  * @size:	size of the resulting SG vector in bytes
  * @gfp_mask:	the allocation mask
  * @flags:	the allocation flags
  * @count:	the resulting count of SG entries in the resulting SG vector
  * @sgv:	the resulting SGV object
  * @mem_lim:	memory limits
  * @priv:	pointer to private for this allocation data
  *
  * Description:
  *    Allocate an SG vector from the SGV pool and returns pointer to it or
  *    NULL in case of any error. See the SGV pool documentation for more details.
  */
 struct scatterlist *sgv_pool_alloc(struct sgv_pool *pool, unsigned int size,
 	gfp_t gfp_mask, int flags, int *count,
 	struct sgv_pool_obj **sgv, struct scst_mem_lim *mem_lim, void *priv)
 {
 	struct sgv_pool_obj *obj;
 	int cache_num, pages, cnt;
 	struct scatterlist *res = NULL;
 	int pages_to_alloc;
 	int no_cached = flags & SGV_POOL_ALLOC_NO_CACHED;
 	bool allowed_mem_checked = false, hiwmk_checked = false;

 	TRACE_ENTRY();

 	if (unlikely(size == 0))
 		goto out;

 	EXTRACHECKS_BUG_ON((gfp_mask & __GFP_NOFAIL) == __GFP_NOFAIL);

 	pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	if (pool->single_alloc_pages == 0) {
 		int pages_order = get_order(size);

 		cache_num = pages_order;
 		pages_to_alloc = (1 << pages_order);
 	} else {
 		cache_num = 0;
 		pages_to_alloc = max(pool->single_alloc_pages, pages);
 	}

 	TRACE_MEM("size=%d, pages=%d, pages_to_alloc=%d, cache num=%d, "
 		"flags=%x, no_cached=%d, *sgv=%p", size, pages,
 		pages_to_alloc, cache_num, flags, no_cached, *sgv);

 	if (*sgv != NULL) {
 		obj = *sgv;

 		TRACE_MEM("Supplied obj %p, cache num %d", obj, obj->cache_num);

 		EXTRACHECKS_BUG_ON(obj->sg_count != 0);

 		if (unlikely(!sgv_check_allowed_mem(mem_lim, pages_to_alloc)))
 			goto out_fail_free_sg_entries;
 		allowed_mem_checked = true;

 		if (unlikely(sgv_hiwmk_check(pages_to_alloc) != 0))
 			goto out_fail_free_sg_entries;
 		hiwmk_checked = true;
 	} else if ((pages_to_alloc <= pool->max_cached_pages) && !no_cached) {
 		if (unlikely(!sgv_check_allowed_mem(mem_lim, pages_to_alloc)))
 			goto out_fail;
 		allowed_mem_checked = true;

 		obj = sgv_get_obj(pool, cache_num, pages_to_alloc, gfp_mask,
 			flags & SGV_POOL_ALLOC_GET_NEW);
 		if (unlikely(obj == NULL)) {
 			TRACE(TRACE_OUT_OF_MEM, "Allocation of "
 				"sgv_pool_obj failed (size %d)", size);
 			goto out_fail;
 		}

 		if (obj->sg_count != 0) {
 			TRACE_MEM("Cached obj %p", obj);
 			atomic_inc(&pool->cache_acc[cache_num].hit_alloc);
 			goto success;
 		}

 		if (flags & SGV_POOL_NO_ALLOC_ON_CACHE_MISS) {
 			if (!(flags & SGV_POOL_RETURN_OBJ_ON_ALLOC_FAIL))
 				goto out_fail_free;
 		}

 		if (likely(!obj->recycling_list_entry.next)) {
 			TRACE_MEM("Brand new obj %p", obj);
 		} else if (unlikely(obj->sg_entries != obj->sg_entries_data)) {
 			TRACE_MEM("Cached obj %p with sg_count == 0", obj);
 			kfree(obj->sg_entries);
 			obj->sg_entries = NULL;
 		}

 		if (pages_to_alloc <= sgv_max_local_pages) {
 			obj->sg_entries = obj->sg_entries_data;
 			sg_init_table(obj->sg_entries, pages_to_alloc);
 			TRACE_MEM("sg_entries %p", obj->sg_entries);
 			if (sgv_pool_clustered(pool)) {
 				obj->trans_tbl = (struct trans_tbl_ent *)
 					(obj->sg_entries + pages_to_alloc);
 				TRACE_MEM("trans_tbl %p", obj->trans_tbl);
 				/*
 				 * No need to clear trans_tbl, if needed, it
 				 * will be fully rewritten in
 				 * sgv_alloc_sg_entries().
 				 */
 			}
 		} else {
 			if (unlikely(sgv_alloc_arrays(obj, pages_to_alloc,
 					gfp_mask) != 0))
 				goto out_fail_free;
 		}

 		if ((flags & SGV_POOL_NO_ALLOC_ON_CACHE_MISS) &&
 		    (flags & SGV_POOL_RETURN_OBJ_ON_ALLOC_FAIL))
 			goto out_return;

 		obj->allocator_priv = priv;

 		if (unlikely(sgv_hiwmk_check(pages_to_alloc) != 0))
 			goto out_fail_free_sg_entries;
 		hiwmk_checked = true;
 	} else {
 		int sz;

 		pages_to_alloc = pages;

 		if (unlikely(!sgv_check_allowed_mem(mem_lim, pages_to_alloc)))
 			goto out_fail;
 		allowed_mem_checked = true;

 		if (flags & SGV_POOL_NO_ALLOC_ON_CACHE_MISS)
 			goto out_return2;

 		sz = sizeof(*obj) + pages * sizeof(obj->sg_entries[0]);

 		obj = kmalloc(sz, gfp_mask);
 		if (unlikely(obj == NULL)) {
 			TRACE(TRACE_OUT_OF_MEM, "Allocation of "
 				"sgv_pool_obj failed (size %d)", size);
 			goto out_fail;
 		}
 		memset(obj, 0, sizeof(*obj));

 		obj->owner_pool = pool;
 		cache_num = -1;
 		obj->cache_num = cache_num;
 		obj->pages = pages_to_alloc;
 		obj->allocator_priv = priv;

 		obj->sg_entries = obj->sg_entries_data;
 		sg_init_table(obj->sg_entries, pages);

 		if (unlikely(sgv_hiwmk_check(pages_to_alloc) != 0))
 			goto out_fail_free_sg_entries;
 		hiwmk_checked = true;

 		TRACE_MEM("Big or no_cached obj %p (size %d)", obj, sz);
 	}

 	obj->sg_count = sgv_alloc_sg_entries(obj->sg_entries,
 		pages_to_alloc, gfp_mask, pool->clustering_type,
 		obj->trans_tbl, &pool->alloc_fns, priv);
 	if (unlikely(obj->sg_count <= 0)) {
 		obj->sg_count = 0;
 		if ((flags & SGV_POOL_RETURN_OBJ_ON_ALLOC_FAIL) &&
 		    (cache_num >= 0))
 			goto out_return1;
 		else
 			goto out_fail_free_sg_entries;
 	}

 	if (cache_num >= 0) {
 		atomic_add(pages_to_alloc - obj->sg_count,
 			&pool->cache_acc[cache_num].merged);
 	} else {
 		if (no_cached) {
 			atomic_add(pages_to_alloc,
 				&pool->other_pages);
 			atomic_add(pages_to_alloc - obj->sg_count,
 				&pool->other_merged);
 		} else {
 			atomic_add(pages_to_alloc,
 				&pool->big_pages);
 			atomic_add(pages_to_alloc - obj->sg_count,
 				&pool->big_merged);
 		}
 	}

 success:
 	if (cache_num >= 0) {
 		int sg;

 		atomic_inc(&pool->cache_acc[cache_num].total_alloc);
 		if (sgv_pool_clustered(pool))
 			cnt = obj->trans_tbl[pages-1].sg_num;
 		else
 			cnt = pages;
 		sg = cnt-1;
 		obj->orig_sg = sg;
 		obj->orig_length = obj->sg_entries[sg].length;
 		if (sgv_pool_clustered(pool)) {
 			obj->sg_entries[sg].length =
 				(pages - obj->trans_tbl[sg].pg_count) << PAGE_SHIFT;
 		}
 	} else {
 		cnt = obj->sg_count;
 		if (no_cached)
 			atomic_inc(&pool->other_alloc);
 		else
 			atomic_inc(&pool->big_alloc);
 	}

 	*count = cnt;
 	res = obj->sg_entries;
 	*sgv = obj;

 	obj->sg_entries[cnt-1].length -= PAGE_ALIGN(size) - size;
 	sg_mark_end(&obj->sg_entries[cnt-1]);

 	TRACE_MEM("obj=%p, sg_entries %p (size=%d, pages=%d, sg_count=%d, "
 		"count=%d, last_len=%d)", obj, obj->sg_entries, size, pages,
 		obj->sg_count, *count, obj->sg_entries[obj->orig_sg].length);

 out:
 	TRACE_EXIT_HRES(res);
 	return res;

 out_return:
 	obj->allocator_priv = priv;
 	obj->owner_pool = pool;

 out_return1:
 	*sgv = obj;
 	TRACE_MEM("Returning failed obj %p", obj);

 out_return2:
 	*count = pages_to_alloc;
 	res = NULL;
 	goto out_uncheck;

 out_fail_free_sg_entries:
 	if (obj->sg_entries != obj->sg_entries_data) {
 		if (obj->trans_tbl !=
 			(struct trans_tbl_ent *)obj->sg_entries_data) {
 			/* kfree() handles NULL parameter */
 			kfree(obj->trans_tbl);
 			obj->trans_tbl = NULL;
 		}
 		kfree(obj->sg_entries);
 		obj->sg_entries = NULL;
 	}

 out_fail_free:
 	if (cache_num >= 0) {
 		spin_lock_bh(&pool->sgv_pool_lock);
 		sgv_dec_cached_entries(pool, pages_to_alloc);
 		spin_unlock_bh(&pool->sgv_pool_lock);

 		kmem_cache_free(pool->caches[obj->cache_num], obj);
 	} else
 		kfree(obj);

 out_fail:
 	res = NULL;
 	*count = 0;
 	*sgv = NULL;
 	TRACE_MEM("%s", "Allocation failed");

 out_uncheck:
 	if (hiwmk_checked)
 		sgv_hiwmk_uncheck(pages_to_alloc);
 	if (allowed_mem_checked)
 		sgv_uncheck_allowed_mem(mem_lim, pages_to_alloc);
 	goto out;
 }
 EXPORT_SYMBOL_GPL(sgv_pool_alloc);

 /*
  * sgv_get_priv - return the private allocation data
  *
  * Allows to get the allocation private data for this SGV
  * cache object. The private data supposed to be set by sgv_pool_alloc().
  */
 void *sgv_get_priv(struct sgv_pool_obj *obj)
 {
 	return obj->allocator_priv;
 }
 EXPORT_SYMBOL_GPL(sgv_get_priv);

 /**
  * sgv_pool_free - free previously allocated SG vector
  * @obj:	the SGV object to free
  * @mem_lim:	memory limits
  *
  * Description:
  *    Frees previously allocated SG vector and updates memory limits
  */
 void sgv_pool_free(struct sgv_pool_obj *obj, struct scst_mem_lim *mem_lim)
 {
 	int pages = (obj->sg_count != 0) ? obj->pages : 0;

 	TRACE_MEM("Freeing obj %p, cache num %d, pages %d, sg_entries %p, "
 		"sg_count %d, allocator_priv %p", obj, obj->cache_num, pages,
 		obj->sg_entries, obj->sg_count, obj->allocator_priv);

 /*
  * Enable it if you are investigating a data corruption and want to make
  * sure that target or dev handler didn't leave the pages mapped somewhere and,
  * hence, provoked a data corruption.
  *
  * Make sure the check value for _count is set correctly. In most cases, 1 is
  * correct, but, e.g., iSCSI-SCST can call it with value 2, because
  * it frees the corresponding cmd before the last put_page() call from
  * net_put_page() for the last page in the SG. Also, user space dev handlers
  * usually have their memory mapped in their address space.
  */
 #if 0
 	{
 		struct scatterlist *sg = obj->sg_entries;
 		int i;

 		for (i = 0; i < obj->sg_count; i++) {
 			struct page *p = sg_page(&sg[i]);
 			int len = sg[i].length;
 			int pages = PAGE_ALIGN(len) >> PAGE_SHIFT;

 			while (pages > 0) {
 				if (page_count(p) != 1) {
 					PRINT_WARNING("Freeing page %p with "
 						"additional owners (_count %d). "
 						"Data corruption possible!",
 						p, page_count(p));
 					WARN_ON(1);
 				}
 				pages--;
 				p++;
 			}
 		}
 	}
 #endif

 	if (obj->cache_num >= 0) {
 		obj->sg_entries[obj->orig_sg].length = obj->orig_length;
 		sg_unmark_end(&obj->sg_entries[obj->orig_sg]);
 		sgv_put_obj(obj);
 	} else {
 		obj->owner_pool->alloc_fns.free_pages_fn(obj->sg_entries,
 			obj->sg_count, obj->allocator_priv);
 		kfree(obj);
 		sgv_hiwmk_uncheck(pages);
 	}

 	sgv_uncheck_allowed_mem(mem_lim, pages);
 	return;
 }
 EXPORT_SYMBOL_GPL(sgv_pool_free);

 /*
  * scst_alloc_sg() - allocates an SG vector
  *
  * Allocates and returns pointer to SG vector with data size "size".
  * In *count returned the count of entries in the vector.
  * Returns NULL for failure.
  *
  * Please don't use it for massive commands data buffers, because it
  * isn't fair and don't account per device memory limits. Use sgv_pool_alloc()
  * instead.
  */
 struct scatterlist *scst_alloc_sg(int size, gfp_t gfp_mask, int *count)
 {
 	struct scatterlist *res;
 	int pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	struct sgv_pool_alloc_fns sys_alloc_fns = {
 		.alloc_pages_fn	= sgv_alloc_sys_pages,
 		.free_pages_fn	= sgv_free_sys_sg_entries,
 	};
 	int no_fail = ((gfp_mask & __GFP_NOFAIL) == __GFP_NOFAIL);
 	int cnt;

 	TRACE_ENTRY();

 #ifndef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 	atomic_inc(&sgv_other_total_alloc);
 #endif

 	if (unlikely(sgv_hiwmk_check(pages) != 0)) {
 		if (!no_fail) {
 			res = NULL;
 			goto out;
 		} else {
 			/*
 			 * Update active_pages_total since alloc can't fail.
 			 * If it wasn't updated then the counter would cross 0
 			 * on free again.
 			 */
 			sgv_hiwmk_uncheck(-pages);
 		}
 	}

 	res = kmalloc_array(pages, sizeof(*res), gfp_mask);
 	if (res == NULL) {
 		TRACE(TRACE_OUT_OF_MEM, "Unable to allocate sg for %d pages",
 			pages);
 		goto out_uncheck;
 	}

 	sg_init_table(res, pages);

 	/*
 	 * If we allow use clustering here, we will have troubles in
 	 * scst_free_sg() to figure out how many pages are in the SG vector.
 	 * So, let's always don't use clustering.
 	 */
 	cnt = sgv_alloc_sg_entries(res, pages, gfp_mask, sgv_no_clustering,
 			NULL, &sys_alloc_fns, NULL);
 	if (cnt <= 0)
 		goto out_free;

 	res[cnt-1].length -= PAGE_ALIGN(size) - size;

 	*count = cnt;

 out:
 	TRACE_MEM("Alloced sg %p (count %d, no_fail %d)", res, *count, no_fail);

 	TRACE_EXIT_HRES(res);
 	return res;

 out_free:
 	kfree(res);
 	res = NULL;

 out_uncheck:
 	if (!no_fail)
 		sgv_hiwmk_uncheck(pages);
 	goto out;
 }
 EXPORT_SYMBOL_GPL(scst_alloc_sg);

 /*
  * scst_free_sg() - frees SG vector
  *
  * Frees SG vector returned by scst_alloc_sg().
  */
 void scst_free_sg(struct scatterlist *sg, int count)
 {
 	TRACE_MEM("Freeing sg=%p", sg);

 	sgv_hiwmk_uncheck(count);

 	sgv_free_sys_sg_entries(sg, count, NULL);
 	kfree(sg);
 	return;
 }
 EXPORT_SYMBOL_GPL(scst_free_sg);

 /* Must be called under sgv_pools_mutex */
 static void sgv_pool_init_cache(struct sgv_pool *pool, int cache_num,
 	bool per_cpu)
 {
 	int size;
 	int pages;
 	struct sgv_pool_obj *obj;

 	atomic_set(&pool->cache_acc[cache_num].total_alloc, 0);
 	atomic_set(&pool->cache_acc[cache_num].hit_alloc, 0);
 	atomic_set(&pool->cache_acc[cache_num].merged, 0);

 	if (pool->single_alloc_pages == 0)
 		pages = 1 << cache_num;
 	else
 		pages = pool->single_alloc_pages;

 	if (pages <= sgv_max_local_pages) {
 		size = sizeof(*obj) + pages *
 			(sizeof(obj->sg_entries[0]) +
 			 ((pool->clustering_type != sgv_no_clustering) ?
 				sizeof(obj->trans_tbl[0]) : 0));
 	} else if (pages <= sgv_max_trans_pages) {
 		/*
 		 * sg_entries is allocated outside object,
 		 * but trans_tbl is still embedded.
 		 */
 		size = sizeof(*obj) + pages *
 			(((pool->clustering_type != sgv_no_clustering) ?
 				sizeof(obj->trans_tbl[0]) : 0));
 	} else {
 		size = sizeof(*obj);
 		/* both sgv and trans_tbl are kmalloc'ed() */
 	}

 	TRACE_MEM("pages=%d, size=%d (per cpu %d)", pages, size, per_cpu);

 	scnprintf(pool->cache_names[cache_num],
 		sizeof(pool->cache_names[cache_num]),
 		"%s-%uK", pool->name, (pages << PAGE_SHIFT) >> 10);
 	pool->caches[cache_num] = kmem_cache_create(
 		pool->cache_names[cache_num], size,
 		0, per_cpu ? SCST_SLAB_FLAGS :
 			     (SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN), NULL
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
 		, NULL);
 #else
 		);
 #endif
 	return;
 }

 /* Must be called under sgv_pools_mutex */
 static int sgv_pool_init(struct sgv_pool *pool, const char *name,
 	enum sgv_clustering_types clustering_type, int single_alloc_pages,
 	int purge_interval, bool per_cpu)
 {
 	int res = -ENOMEM;
 	int i;

 	TRACE_ENTRY();

 	if (single_alloc_pages < 0) {
 		PRINT_ERROR("Wrong single_alloc_pages value %d",
 			single_alloc_pages);
 		res = -EINVAL;
 		goto out;
 	}

 	memset(pool, 0, sizeof(*pool));

 	atomic_set(&pool->big_alloc, 0);
 	atomic_set(&pool->big_pages, 0);
 	atomic_set(&pool->big_merged, 0);
 	atomic_set(&pool->other_alloc, 0);
 	atomic_set(&pool->other_pages, 0);
 	atomic_set(&pool->other_merged, 0);

 	pool->clustering_type = clustering_type;
 	pool->single_alloc_pages = single_alloc_pages;
 	if (purge_interval != 0) {
 		pool->purge_interval = purge_interval;
 		if (purge_interval < 0) {
 			/* Let's pretend that it's always scheduled */
 			pool->purge_work_scheduled = 1;
 		}
 	} else
 		pool->purge_interval = SGV_DEFAULT_PURGE_INTERVAL;
 	if (single_alloc_pages == 0) {
 		pool->max_caches = SGV_POOL_ELEMENTS;
 		pool->max_cached_pages = 1 << (SGV_POOL_ELEMENTS - 1);
 	} else {
 		pool->max_caches = 1;
 		pool->max_cached_pages = single_alloc_pages;
 	}
 	pool->alloc_fns.alloc_pages_fn = sgv_alloc_sys_pages;
 	pool->alloc_fns.free_pages_fn = sgv_free_sys_sg_entries;

 	TRACE_MEM("name %s, sizeof(*obj)=%zd, clustering_type=%d, "
 		"single_alloc_pages=%d, max_caches=%d, max_cached_pages=%d",
 		name, sizeof(struct sgv_pool_obj), clustering_type,
 		single_alloc_pages, pool->max_caches, pool->max_cached_pages);

 	strlcpy(pool->name, name, sizeof(pool->name)-1);

 	pool->owner_mm = current->mm;

 	for (i = 0; i < pool->max_caches; i++) {
 		sgv_pool_init_cache(pool, i, per_cpu);
 		if (pool->caches[i] == NULL) {
 			PRINT_ERROR("Allocation of sgv_pool "
 				"cache %s(%d) failed", name, i);
 			goto out_free;
 		}
 	}

 	atomic_set(&pool->sgv_pool_ref, 1);
 	spin_lock_init(&pool->sgv_pool_lock);
 	INIT_LIST_HEAD(&pool->sorted_recycling_list);
 	for (i = 0; i < pool->max_caches; i++)
 		INIT_LIST_HEAD(&pool->recycling_lists[i]);

 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20))
 	INIT_DELAYED_WORK(&pool->sgv_purge_work, sgv_purge_work_fn);
 #else
 	INIT_WORK(&pool->sgv_purge_work, sgv_purge_work_fn, pool);
 #endif

 	spin_lock_bh(&sgv_pools_lock);
 	list_add_tail(&pool->sgv_pools_list_entry, &sgv_pools_list);
 	spin_unlock_bh(&sgv_pools_lock);

 	res = scst_sgv_sysfs_create(pool);
 	if (res != 0)
 		goto out_del;

 	res = 0;

 out:
 	TRACE_EXIT_RES(res);
 	return res;

 out_del:
 	spin_lock_bh(&sgv_pools_lock);
 	list_del(&pool->sgv_pools_list_entry);
 	spin_unlock_bh(&sgv_pools_lock);

 	synchronize_rcu();

 out_free:
 	for (i = 0; i < pool->max_caches; i++) {
 		if (pool->caches[i]) {
 			kmem_cache_destroy(pool->caches[i]);
 			pool->caches[i] = NULL;
 		} else
 			break;
 	}
 	goto out;
 }

 static void sgv_evaluate_local_max_pages(void)
 {
 	int space4sgv_ttbl = PAGE_SIZE - sizeof(struct sgv_pool_obj);

 	sgv_max_local_pages = space4sgv_ttbl /
 		  (sizeof(struct trans_tbl_ent) + sizeof(struct scatterlist));

 	sgv_max_trans_pages =  space4sgv_ttbl / sizeof(struct trans_tbl_ent);

 	TRACE_MEM("sgv_max_local_pages %d, sgv_max_trans_pages %d",
 		sgv_max_local_pages, sgv_max_trans_pages);
 	return;
 }

 /*
  * sgv_pool_flush() - flushes the SGV pool.
  *
  * Flushes, i.e. frees, all the cached entries in the SGV pool.
  */
 void sgv_pool_flush(struct sgv_pool *pool)
 {
 	int i;

 	TRACE_ENTRY();

 	for (i = 0; i < pool->max_caches; i++) {
 		struct sgv_pool_obj *obj;

 		spin_lock_bh(&pool->sgv_pool_lock);

 		while (!list_empty(&pool->recycling_lists[i])) {
 			obj = list_first_entry(&pool->recycling_lists[i],
 				struct sgv_pool_obj, recycling_list_entry);

 			__sgv_purge_from_cache(obj);

 			spin_unlock_bh(&pool->sgv_pool_lock);

 			EXTRACHECKS_BUG_ON(obj->owner_pool != pool);
 			sgv_dtor_and_free(obj);

 			spin_lock_bh(&pool->sgv_pool_lock);
 		}
 		spin_unlock_bh(&pool->sgv_pool_lock);
 	}

 	TRACE_EXIT();
 	return;
 }
 EXPORT_SYMBOL_GPL(sgv_pool_flush);

 static void sgv_pool_destroy(struct sgv_pool *pool)
 {
 	int i;

 	TRACE_ENTRY();

 	sgv_pool_flush(pool);

 	mutex_lock(&sgv_pools_mutex);
 	spin_lock_bh(&sgv_pools_lock);
 	list_del(&pool->sgv_pools_list_entry);
 	spin_unlock_bh(&sgv_pools_lock);
 	mutex_unlock(&sgv_pools_mutex);

 	synchronize_rcu();

 	scst_sgv_sysfs_del(pool);

 	cancel_delayed_work_sync(&pool->sgv_purge_work);

 	for (i = 0; i < pool->max_caches; i++) {
 		if (pool->caches[i])
 			kmem_cache_destroy(pool->caches[i]);
 		pool->caches[i] = NULL;
 	}

 	kmem_cache_free(sgv_pool_cachep, pool);

 	TRACE_EXIT();
 	return;
 }

 /**
  * sgv_pool_set_allocator - set custom pages allocator
  * @pool:	the cache
  * @alloc_pages_fn: pages allocation function
  * @free_pages_fn: pages freeing function
  *
  * Description:
  *    Allows to set custom pages allocator for the SGV pool.
  *    See the SGV pool documentation for more details.
  */
 void sgv_pool_set_allocator(struct sgv_pool *pool,
 	struct page *(*alloc_pages_fn)(struct scatterlist *, gfp_t, void *),
 	void (*free_pages_fn)(struct scatterlist *, int, void *))
 {
 	pool->alloc_fns.alloc_pages_fn = alloc_pages_fn;
 	pool->alloc_fns.free_pages_fn = free_pages_fn;
 	return;
 }
 EXPORT_SYMBOL_GPL(sgv_pool_set_allocator);

 /**
  * sgv_pool_create_node - creates and initializes an SGV pool
  * @name:	the name of the SGV pool
  * @clustering_type:	sets type of the pages clustering.
  * @single_alloc_pages:	if 0, then the SGV pool will work in the set of
  *		power 2 size buffers mode. If >0, then the SGV pool will
  *		work in the fixed size buffers mode. In this case
  *		single_alloc_pages sets the size of each buffer in pages.
  * @shared:	sets if the SGV pool can be shared between devices or not.
  *		The cache sharing allowed only between devices created inside
  *		the same address space. If an SGV pool is shared, each
  *		subsequent call of sgv_pool_create*() with the same cache name
  *		will not create a new cache, but instead return a reference
  *		to it.
  * @purge_interval: sets the cache purging interval. I.e., an SG buffer
  *		will be freed if it's unused for time t
  *		purge_interval <= t < 2*purge_interval. If purge_interval
  *		is 0, then the default interval will be used (60 seconds).
  *		If purge_interval <0, then the automatic purging will be
  *		disabled. In HZ.
  * @nodeid:	NUMA node for this pool. Can be NUMA_NO_NODE, if the
  *		caller doesn't care.
  *
  * Description:
  *    Returns the resulting SGV pool or NULL in case of any error.
  */
 struct sgv_pool *sgv_pool_create_node(const char *name,
 	enum sgv_clustering_types clustering_type,
 	int single_alloc_pages, bool shared, int purge_interval, int nodeid)
 {
 	struct sgv_pool *pool, *tp;
 	int rc;

 	TRACE_ENTRY();

 	TRACE_MEM("Creating pool %s (clustering_type %d, "
 		"single_alloc_pages %d, shared %d, purge_interval %d, "
 		"nodeid %d)", name, clustering_type, single_alloc_pages,
 		shared, purge_interval, nodeid);

 	/*
 	 * __sgv_shrink() takes sgv_pools_mutex, so we have to play tricks to
 	 * prevent deadlock with it if this allocation will try to reclaim memory
 	 */

 	pool = kmem_cache_alloc_node(sgv_pool_cachep, GFP_KERNEL, nodeid);
 	if (pool == NULL) {
 		PRINT_ERROR("Allocation of sgv_pool failed (size %zd)",
 			sizeof(*pool));
 		goto out;
 	}
 	memset(pool, 0, sizeof(*pool));

 	mutex_lock(&sgv_pools_mutex);

 	list_for_each_entry(tp, &sgv_pools_list, sgv_pools_list_entry) {
 		if (strcmp(tp->name, name) == 0) {
 			if (shared) {
 				if (tp->owner_mm != current->mm) {
 					PRINT_ERROR("Attempt of a shared use "
 						"of SGV pool %s with "
 						"different MM", name);
 					goto out_free;
 				}
 				sgv_pool_get(tp);
 				goto out_free;
 			} else {
 				PRINT_ERROR("SGV pool %s already exists", name);
 				tp = NULL;
 				goto out_free;
 			}
 		}
 	}
 	tp = NULL;

 	rc = sgv_pool_init(pool, name, clustering_type, single_alloc_pages,
 				purge_interval, nodeid != NUMA_NO_NODE);
 	if (rc != 0)
 		goto out_free;

 out_unlock:
 	mutex_unlock(&sgv_pools_mutex);

 out:
 	TRACE_EXIT_RES(pool != NULL);
 	return pool;

 out_free:
 	kmem_cache_free(sgv_pool_cachep, pool);
 	pool = tp;
 	goto out_unlock;
 }
 EXPORT_SYMBOL_GPL(sgv_pool_create_node);

 /*
  * sgv_pool_get - increase ref counter for the corresponding SGV pool
  *
  * Increases ref counter for the corresponding SGV pool
  */
 void sgv_pool_get(struct sgv_pool *pool)
 {
 	atomic_inc(&pool->sgv_pool_ref);
 	TRACE_MEM("Incrementing sgv pool %p ref (new value %d)",
 		pool, atomic_read(&pool->sgv_pool_ref));
 	return;
 }
 EXPORT_SYMBOL_GPL(sgv_pool_get);

 /*
  * sgv_pool_put - decrease ref counter for the corresponding SGV pool
  *
  * Decreases ref counter for the corresponding SGV pool. If the ref
  * counter reaches 0, the cache will be destroyed.
  */
 void sgv_pool_put(struct sgv_pool *pool)
 {
 	TRACE_MEM("Decrementing sgv pool %p ref (new value %d)",
 		pool, atomic_read(&pool->sgv_pool_ref)-1);
 	if (atomic_dec_and_test(&pool->sgv_pool_ref))
 		sgv_pool_destroy(pool);
 	return;
 }
 EXPORT_SYMBOL_GPL(sgv_pool_put);

 /**
  * sgv_pool_del - deletes the corresponding SGV pool
  * @pool:	the cache to delete.
  *
  * Description:
  *    If the cache is shared, it will decrease its reference counter.
  *    If the reference counter reaches 0, the cache will be destroyed.
  */
 void sgv_pool_del(struct sgv_pool *pool)
 {
 	TRACE_ENTRY();

 	sgv_pool_put(pool);

 	TRACE_EXIT();
 	return;
 }
 EXPORT_SYMBOL_GPL(sgv_pool_del);

 /* Both parameters in pages */
 int scst_sgv_pools_init(unsigned long mem_hwmark, unsigned long mem_lwmark)
 {
 	int res = 0, i;

 	TRACE_ENTRY();

 	sgv_pool_cachep = KMEM_CACHE(sgv_pool, SCST_SLAB_FLAGS|SLAB_HWCACHE_ALIGN);
 	if (sgv_pool_cachep == NULL)
 		goto out_err;

 	sgv_hi_wmk = mem_hwmark;
 	sgv_lo_wmk = mem_lwmark;

 	sgv_evaluate_local_max_pages();

 	sgv_norm_pool_main = sgv_pool_create("sgv", sgv_no_clustering, 0, false, 0);
 	if (sgv_norm_pool_main == NULL)
 		goto out_free_pool;

 	sgv_norm_clust_pool_main = sgv_pool_create("sgv-clust",
 		sgv_full_clustering, 0, false, 0);
 	if (sgv_norm_clust_pool_main == NULL)
 		goto out_free_norm;

 	sgv_dma_pool_main = sgv_pool_create("sgv-dma", sgv_no_clustering, 0,
 				false, 0);
 	if (sgv_dma_pool_main == NULL)
 		goto out_free_clust;

 	/*
 	 * ToDo: not compatible with CPU hotplug! Notification
 	 * callbacks must be installed!
 	 */

 	for (i = 0; i < nr_cpu_ids; i++)
 		sgv_norm_pool_global[i] = sgv_norm_pool_main;

 	for (i = 0; i < nr_cpu_ids; i++)
 		sgv_norm_clust_pool_global[i] = sgv_norm_clust_pool_main;

 	for (i = 0; i < nr_cpu_ids; i++)
 		sgv_dma_pool_global[i] = sgv_dma_pool_main;

 	for (i = 0; i < nr_cpu_ids; i++) {
 		char name[60];

 		if (!cpu_online(i))
 			continue;
 		scnprintf(name, sizeof(name), "sgv-%d", i);
 		sgv_norm_pool_per_cpu[i] = sgv_pool_create_node(name,
 			sgv_no_clustering, 0, false, 0, cpu_to_node(i));
 		if (sgv_norm_pool_per_cpu[i] == NULL)
 			goto out_free_per_cpu_norm;
 	}

 	for (i = 0; i < nr_cpu_ids; i++) {
 		char name[60];

 		if (!cpu_online(i))
 			continue;
 		scnprintf(name, sizeof(name), "sgv-clust-%d", i);
 		sgv_norm_clust_pool_per_cpu[i] = sgv_pool_create_node(name,
 			sgv_full_clustering, 0, false, 0, cpu_to_node(i));
 		if (sgv_norm_clust_pool_per_cpu[i] == NULL)
 			goto out_free_per_cpu_clust;
 	}

 	for (i = 0; i < nr_cpu_ids; i++) {
 		char name[60];

 		if (!cpu_online(i))
 			continue;
 		scnprintf(name, sizeof(name), "sgv-dma-%d", i);
 		sgv_dma_pool_per_cpu[i] = sgv_pool_create_node(name,
 			sgv_no_clustering, 0, false, 0, cpu_to_node(i));
 		if (sgv_dma_pool_per_cpu[i] == NULL)
 			goto out_free_per_cpu_dma;
 	}

 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
 	sgv_shrinker = set_shrinker(DEFAULT_SEEKS, sgv_shrink);
 #else
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
 	sgv_shrinker.count_objects = sgv_can_be_shrunk;
 	sgv_shrinker.scan_objects = sgv_scan_shrink;
 #else
 	sgv_shrinker.shrink = sgv_shrink;
 #endif
 	sgv_shrinker.seeks = DEFAULT_SEEKS;
 	register_shrinker(&sgv_shrinker);
 #endif

 out:
 	TRACE_EXIT_RES(res);
 	return res;

 out_free_per_cpu_dma:
 	for (i = 0; i < nr_cpu_ids; i++)
 		if (sgv_dma_pool_per_cpu[i] != NULL)
 			sgv_pool_destroy(sgv_dma_pool_per_cpu[i]);

 out_free_per_cpu_clust:
 	for (i = 0; i < nr_cpu_ids; i++)
 		if (sgv_norm_clust_pool_per_cpu[i] != NULL)
 			sgv_pool_destroy(sgv_norm_clust_pool_per_cpu[i]);

 out_free_per_cpu_norm:
 	for (i = 0; i < nr_cpu_ids; i++)
 		if (sgv_norm_pool_per_cpu[i] != NULL)
 			sgv_pool_destroy(sgv_norm_pool_per_cpu[i]);

 	sgv_pool_destroy(sgv_dma_pool_main);

 out_free_clust:
 	sgv_pool_destroy(sgv_norm_clust_pool_main);

 out_free_norm:
 	sgv_pool_destroy(sgv_norm_pool_main);

 out_free_pool:
 	kmem_cache_destroy(sgv_pool_cachep);

 out_err:
 	res = -ENOMEM;
 	goto out;
 }

 void scst_sgv_pools_deinit(void)
 {
 	int i;

 	TRACE_ENTRY();

 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
 	remove_shrinker(sgv_shrinker);
 #else
 	unregister_shrinker(&sgv_shrinker);
 #endif

 	sgv_pool_destroy(sgv_dma_pool_main);
 	for (i = 0; i < nr_cpu_ids; i++)
 		if (sgv_dma_pool_per_cpu[i] != NULL)
 			sgv_pool_destroy(sgv_dma_pool_per_cpu[i]);

 	sgv_pool_destroy(sgv_norm_pool_main);
 	for (i = 0; i < nr_cpu_ids; i++)
 		if (sgv_norm_pool_per_cpu[i] != NULL)
 			sgv_pool_destroy(sgv_norm_pool_per_cpu[i]);

 	sgv_pool_destroy(sgv_norm_clust_pool_main);
 	for (i = 0; i < nr_cpu_ids; i++)
 		if (sgv_norm_clust_pool_per_cpu[i] != NULL)
 			sgv_pool_destroy(sgv_norm_clust_pool_per_cpu[i]);

 	for (i = 0; i < nr_cpu_ids; i++)
 		sgv_norm_pool_global[i] = NULL;

 	for (i = 0; i < nr_cpu_ids; i++)
 		sgv_norm_clust_pool_global[i] = NULL;

 	for (i = 0; i < nr_cpu_ids; i++)
 		sgv_dma_pool_global[i] = NULL;

 	kmem_cache_destroy(sgv_pool_cachep);

 	TRACE_EXIT();
 	return;
 }


 static ssize_t sgv_sysfs_stat_show(struct kobject *kobj,
 	struct kobj_attribute *attr, char *buf)
 {
 	struct sgv_pool *pool;
 	int i, total = 0, hit = 0, merged = 0, allocated = 0;
 	int oa, om, res;

 	pool = container_of(kobj, struct sgv_pool, sgv_kobj);

 	for (i = 0; i < SGV_POOL_ELEMENTS; i++) {
 		int t;

 		hit += atomic_read(&pool->cache_acc[i].hit_alloc);
 		total += atomic_read(&pool->cache_acc[i].total_alloc);

 		t = atomic_read(&pool->cache_acc[i].total_alloc) -
 			atomic_read(&pool->cache_acc[i].hit_alloc);
 		allocated += t * (1 << i);
 		merged += atomic_read(&pool->cache_acc[i].merged);
 	}

 	res = sprintf(buf, "%-30s %-11s %-11s %-11s %-11s", "Name", "Hit", "Total",
 		"% merged", "Cached (P/I/O)");

 	res += sprintf(&buf[res], "\n%-30s %-11d %-11d %-11d %d/%d/%d\n",
 		pool->name, hit, total,
 		(allocated != 0) ? merged*100/allocated : 0,
 		pool->cached_pages, pool->inactive_cached_pages,
 		pool->cached_entries);

 	for (i = 0; i < SGV_POOL_ELEMENTS; i++) {
 		int t = atomic_read(&pool->cache_acc[i].total_alloc) -
 			atomic_read(&pool->cache_acc[i].hit_alloc);
 		allocated = t * (1 << i);
 		merged = atomic_read(&pool->cache_acc[i].merged);

 		res += sprintf(&buf[res], "  %-28s %-11d %-11d %d\n",
 			pool->cache_names[i],
 			atomic_read(&pool->cache_acc[i].hit_alloc),
 			atomic_read(&pool->cache_acc[i].total_alloc),
 			(allocated != 0) ? merged*100/allocated : 0);
 	}

 	allocated = atomic_read(&pool->big_pages);
 	merged = atomic_read(&pool->big_merged);
 	oa = atomic_read(&pool->other_pages);
 	om = atomic_read(&pool->other_merged);

 	res += sprintf(&buf[res], "  %-40s %d/%-9d %d/%d\n", "big/other",
 		atomic_read(&pool->big_alloc), atomic_read(&pool->other_alloc),
 		(allocated != 0) ? merged*100/allocated : 0,
 		(oa != 0) ? om/oa : 0);

 	return res;
 }

 static ssize_t sgv_sysfs_stat_reset(struct kobject *kobj,
 	struct kobj_attribute *attr, const char *buf, size_t count)
 {
 	struct sgv_pool *pool;
 	int i;

 	TRACE_ENTRY();

 	pool = container_of(kobj, struct sgv_pool, sgv_kobj);

 	for (i = 0; i < SGV_POOL_ELEMENTS; i++) {
 		atomic_set(&pool->cache_acc[i].hit_alloc, 0);
 		atomic_set(&pool->cache_acc[i].total_alloc, 0);
 		atomic_set(&pool->cache_acc[i].merged, 0);
 	}

 	atomic_set(&pool->big_pages, 0);
 	atomic_set(&pool->big_merged, 0);
 	atomic_set(&pool->big_alloc, 0);
 	atomic_set(&pool->other_pages, 0);
 	atomic_set(&pool->other_merged, 0);
 	atomic_set(&pool->other_alloc, 0);

 	PRINT_INFO("Statistics for SGV pool %s reset", pool->name);

 	TRACE_EXIT_RES(count);
 	return count;
 }

 static ssize_t sgv_sysfs_global_stat_show(struct kobject *kobj,
 	struct kobj_attribute *attr, char *buf)
 {
 	struct sgv_pool *pool;
 	int inactive_pages = 0, res;

 	TRACE_ENTRY();

 	spin_lock_bh(&sgv_pools_lock);
 	list_for_each_entry(pool, &sgv_pools_list, sgv_pools_list_entry) {
 		inactive_pages += pool->inactive_cached_pages;
 	}
 	spin_unlock_bh(&sgv_pools_lock);

 #ifdef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 	res = sprintf(buf, "%-42s %d\n", "Inactive pages", inactive_pages);
 #else
 	res = sprintf(buf, "%-42s %d/%d\n%-42s %d/%d\n%-42s %d/%d\n"
 		"%-42s %-11d\n",
 		"Inactive/active pages", inactive_pages,
 		atomic_read(&sgv_pages_total) - inactive_pages,
 		"Hi/lo watermarks [pages]", sgv_hi_wmk, sgv_lo_wmk,
 		"Hi watermark releases/failures",
 		atomic_read(&sgv_releases_on_hiwmk),
 		atomic_read(&sgv_releases_on_hiwmk_failed),
 		"Other allocs", atomic_read(&sgv_other_total_alloc));
 #endif

 	TRACE_EXIT();
 	return res;
 }

 static ssize_t sgv_sysfs_global_stat_reset(struct kobject *kobj,
 	struct kobj_attribute *attr, const char *buf, size_t count)
 {
 	TRACE_ENTRY();

 	atomic_set(&sgv_releases_on_hiwmk, 0);
 	atomic_set(&sgv_releases_on_hiwmk_failed, 0);
 #ifndef CONFIG_SCST_NO_TOTAL_MEM_CHECKS
 	atomic_set(&sgv_other_total_alloc, 0);
 #endif

 	PRINT_INFO("%s", "Global SGV pool statistics reset");

 	TRACE_EXIT_RES(count);
 	return count;
 }

 static struct kobj_attribute sgv_stat_attr =
 	__ATTR(stats, S_IRUGO | S_IWUSR, sgv_sysfs_stat_show,
 		sgv_sysfs_stat_reset);

 static struct attribute *sgv_attrs[] = {
 	&sgv_stat_attr.attr,
 	NULL,
 };

 static void sgv_kobj_release(struct kobject *kobj)
 {
 	struct sgv_pool *pool;

 	TRACE_ENTRY();

 	pool = container_of(kobj, struct sgv_pool, sgv_kobj);
 	if (pool->sgv_kobj_release_cmpl != NULL)
 		complete_all(pool->sgv_kobj_release_cmpl);

 	TRACE_EXIT();
 	return;
 }

 static struct kobj_type sgv_pool_ktype = {
 	.sysfs_ops = &scst_sysfs_ops,
 	.release = sgv_kobj_release,
 	.default_attrs = sgv_attrs,
 };

 static int scst_sgv_sysfs_create(struct sgv_pool *pool)
 {
 	int res;

 	TRACE_ENTRY();

 	res = kobject_init_and_add(&pool->sgv_kobj, &sgv_pool_ktype,
 			scst_sgv_kobj, pool->name);
 	if (res != 0) {
 		PRINT_ERROR("Can't add sgv pool %s to sysfs", pool->name);
 		goto out;
 	}

 out:
 	TRACE_EXIT_RES(res);
 	return res;
 }

 static void scst_sgv_sysfs_del(struct sgv_pool *pool)
 {
 	DECLARE_COMPLETION_ONSTACK(c);

 	TRACE_ENTRY();

 	pool->sgv_kobj_release_cmpl = &c;

 	kobject_del(&pool->sgv_kobj);

 	SCST_KOBJECT_PUT_AND_WAIT(&pool->sgv_kobj, "SGV pool", &c,
 				  &scst_pool_dep_map);

 	TRACE_EXIT();
 }

 static struct kobj_attribute sgv_global_stat_attr =
 	__ATTR(global_stats, S_IRUGO | S_IWUSR, sgv_sysfs_global_stat_show,
 		sgv_sysfs_global_stat_reset);

 static struct attribute *sgv_default_attrs[] = {
 	&sgv_global_stat_attr.attr,
 	NULL,
 };

 static void scst_sysfs_release(struct kobject *kobj)
 {
 	kfree(kobj);
 }

 static struct kobj_type sgv_ktype = {
 	.sysfs_ops = &scst_sysfs_ops,
 	.release = scst_sysfs_release,
 	.default_attrs = sgv_default_attrs,
 };

 /*
  * scst_add_sgv_kobj() - Initialize and add the root SGV kernel object.
  */
 int scst_add_sgv_kobj(struct kobject *parent, const char *name)
 {
 	int res;

 	WARN_ON(scst_sgv_kobj);
 	res = -ENOMEM;
 	scst_sgv_kobj = kzalloc(sizeof(*scst_sgv_kobj), GFP_KERNEL);
 	if (!scst_sgv_kobj)
 		goto out;
 	res = kobject_init_and_add(scst_sgv_kobj, &sgv_ktype, parent, name);
 	if (res != 0)
 		goto out_free;
 out:
 	return res;
 out_free:
 	kobject_put(scst_sgv_kobj);
 	scst_sgv_kobj = NULL;
 	goto out;
 }

 /**
  * scst_del_put_sgv_kobj() - Remove the root SGV kernel object.
  */
 void scst_del_put_sgv_kobj(void)
 {
 	WARN_ON(!scst_sgv_kobj);
 	kobject_del(scst_sgv_kobj);
 	kobject_put(scst_sgv_kobj);
 	scst_sgv_kobj = NULL;
 }