| /* |
| * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. |
| * |
| * This software is available to you under a choice of one of two |
| * licenses. You may choose to be licensed under the terms of the GNU |
| * General Public License (GPL) Version 2, available from the file |
| * COPYING in the main directory of this source tree, or the |
| * OpenIB.org BSD license below: |
| * |
| * Redistribution and use in source and binary forms, with or |
| * without modification, are permitted provided that the following |
| * conditions are met: |
| * |
| * - Redistributions of source code must retain the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer. |
| * |
| * - Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer in the documentation and/or other materials |
| * provided with the distribution. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include <config.h> |
| |
| #include <signal.h> |
| #include <sys/ipc.h> |
| #include <sys/shm.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <errno.h> |
| |
| #include "mlx5.h" |
| #include "bitmap.h" |
| |
| static int mlx5_bitmap_init(struct mlx5_bitmap *bmp, uint32_t num, |
| uint32_t mask) |
| { |
| bmp->last = 0; |
| bmp->top = 0; |
| bmp->max = num; |
| bmp->avail = num; |
| bmp->mask = mask; |
| bmp->avail = bmp->max; |
| bmp->table = calloc(BITS_TO_LONGS(bmp->max), sizeof(*bmp->table)); |
| if (!bmp->table) |
| return -ENOMEM; |
| |
| return 0; |
| } |
| |
| static void bitmap_free_range(struct mlx5_bitmap *bmp, uint32_t obj, |
| int cnt) |
| { |
| int i; |
| |
| obj &= bmp->max - 1; |
| |
| for (i = 0; i < cnt; i++) |
| mlx5_clear_bit(obj + i, bmp->table); |
| bmp->last = min(bmp->last, obj); |
| bmp->top = (bmp->top + bmp->max) & bmp->mask; |
| bmp->avail += cnt; |
| } |
| |
| static int mlx5_bitmap_empty(struct mlx5_bitmap *bmp) |
| { |
| return (bmp->avail == bmp->max) ? 1 : 0; |
| } |
| |
| static int bitmap_avail(struct mlx5_bitmap *bmp) |
| { |
| return bmp->avail; |
| } |
| |
| static void mlx5_bitmap_cleanup(struct mlx5_bitmap *bmp) |
| { |
| if (bmp->table) |
| free(bmp->table); |
| } |
| |
| static void free_huge_mem(struct mlx5_hugetlb_mem *hmem) |
| { |
| mlx5_bitmap_cleanup(&hmem->bitmap); |
| if (shmdt(hmem->shmaddr) == -1) |
| mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); |
| shmctl(hmem->shmid, IPC_RMID, NULL); |
| free(hmem); |
| } |
| |
| static int mlx5_bitmap_alloc(struct mlx5_bitmap *bmp) |
| { |
| uint32_t obj; |
| int ret; |
| |
| obj = mlx5_find_first_zero_bit(bmp->table, bmp->max); |
| if (obj < bmp->max) { |
| mlx5_set_bit(obj, bmp->table); |
| bmp->last = (obj + 1); |
| if (bmp->last == bmp->max) |
| bmp->last = 0; |
| obj |= bmp->top; |
| ret = obj; |
| } else |
| ret = -1; |
| |
| if (ret != -1) |
| --bmp->avail; |
| |
| return ret; |
| } |
| |
| static uint32_t find_aligned_range(unsigned long *bmp, |
| uint32_t start, uint32_t nbits, |
| int len, int alignment) |
| { |
| uint32_t end, i; |
| |
| again: |
| start = align(start, alignment); |
| |
| while ((start < nbits) && mlx5_test_bit(start, bmp)) |
| start += alignment; |
| |
| if (start >= nbits) |
| return -1; |
| |
| end = start + len; |
| if (end > nbits) |
| return -1; |
| |
| for (i = start + 1; i < end; i++) { |
| if (mlx5_test_bit(i, bmp)) { |
| start = i + 1; |
| goto again; |
| } |
| } |
| |
| return start; |
| } |
| |
| static int bitmap_alloc_range(struct mlx5_bitmap *bmp, int cnt, |
| int align) |
| { |
| uint32_t obj; |
| int ret, i; |
| |
| if (cnt == 1 && align == 1) |
| return mlx5_bitmap_alloc(bmp); |
| |
| if (cnt > bmp->max) |
| return -1; |
| |
| obj = find_aligned_range(bmp->table, bmp->last, |
| bmp->max, cnt, align); |
| if (obj >= bmp->max) { |
| bmp->top = (bmp->top + bmp->max) & bmp->mask; |
| obj = find_aligned_range(bmp->table, 0, bmp->max, |
| cnt, align); |
| } |
| |
| if (obj < bmp->max) { |
| for (i = 0; i < cnt; i++) |
| mlx5_set_bit(obj + i, bmp->table); |
| if (obj == bmp->last) { |
| bmp->last = (obj + cnt); |
| if (bmp->last >= bmp->max) |
| bmp->last = 0; |
| } |
| obj |= bmp->top; |
| ret = obj; |
| } else |
| ret = -1; |
| |
| if (ret != -1) |
| bmp->avail -= cnt; |
| |
| return obj; |
| } |
| |
| static struct mlx5_hugetlb_mem *alloc_huge_mem(size_t size) |
| { |
| struct mlx5_hugetlb_mem *hmem; |
| size_t shm_len; |
| |
| hmem = malloc(sizeof(*hmem)); |
| if (!hmem) |
| return NULL; |
| |
| shm_len = align(size, MLX5_SHM_LENGTH); |
| hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W); |
| if (hmem->shmid == -1) { |
| mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); |
| goto out_free; |
| } |
| |
| hmem->shmaddr = shmat(hmem->shmid, MLX5_SHM_ADDR, MLX5_SHMAT_FLAGS); |
| if (hmem->shmaddr == (void *)-1) { |
| mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); |
| goto out_rmid; |
| } |
| |
| if (mlx5_bitmap_init(&hmem->bitmap, shm_len / MLX5_Q_CHUNK_SIZE, |
| shm_len / MLX5_Q_CHUNK_SIZE - 1)) { |
| mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); |
| goto out_shmdt; |
| } |
| |
| /* |
| * Marked to be destroyed when process detaches from shmget segment |
| */ |
| shmctl(hmem->shmid, IPC_RMID, NULL); |
| |
| return hmem; |
| |
| out_shmdt: |
| if (shmdt(hmem->shmaddr) == -1) |
| mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); |
| |
| out_rmid: |
| shmctl(hmem->shmid, IPC_RMID, NULL); |
| |
| out_free: |
| free(hmem); |
| return NULL; |
| } |
| |
| static int alloc_huge_buf(struct mlx5_context *mctx, struct mlx5_buf *buf, |
| size_t size, int page_size) |
| { |
| int found = 0; |
| int nchunk; |
| struct mlx5_hugetlb_mem *hmem; |
| int ret; |
| |
| buf->length = align(size, MLX5_Q_CHUNK_SIZE); |
| nchunk = buf->length / MLX5_Q_CHUNK_SIZE; |
| |
| if (!nchunk) |
| return 0; |
| |
| mlx5_spin_lock(&mctx->hugetlb_lock); |
| list_for_each(&mctx->hugetlb_list, hmem, entry) { |
| if (bitmap_avail(&hmem->bitmap)) { |
| buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1); |
| if (buf->base != -1) { |
| buf->hmem = hmem; |
| found = 1; |
| break; |
| } |
| } |
| } |
| mlx5_spin_unlock(&mctx->hugetlb_lock); |
| |
| if (!found) { |
| hmem = alloc_huge_mem(buf->length); |
| if (!hmem) |
| return -1; |
| |
| buf->base = bitmap_alloc_range(&hmem->bitmap, nchunk, 1); |
| if (buf->base == -1) { |
| free_huge_mem(hmem); |
| return -1; |
| } |
| |
| buf->hmem = hmem; |
| |
| mlx5_spin_lock(&mctx->hugetlb_lock); |
| if (bitmap_avail(&hmem->bitmap)) |
| list_add(&mctx->hugetlb_list, &hmem->entry); |
| else |
| list_add_tail(&mctx->hugetlb_list, &hmem->entry); |
| mlx5_spin_unlock(&mctx->hugetlb_lock); |
| } |
| |
| buf->buf = hmem->shmaddr + buf->base * MLX5_Q_CHUNK_SIZE; |
| |
| ret = ibv_dontfork_range(buf->buf, buf->length); |
| if (ret) { |
| mlx5_dbg(stderr, MLX5_DBG_CONTIG, "\n"); |
| goto out_fork; |
| } |
| buf->type = MLX5_ALLOC_TYPE_HUGE; |
| |
| return 0; |
| |
| out_fork: |
| mlx5_spin_lock(&mctx->hugetlb_lock); |
| bitmap_free_range(&hmem->bitmap, buf->base, nchunk); |
| if (mlx5_bitmap_empty(&hmem->bitmap)) { |
| list_del(&hmem->entry); |
| mlx5_spin_unlock(&mctx->hugetlb_lock); |
| free_huge_mem(hmem); |
| } else |
| mlx5_spin_unlock(&mctx->hugetlb_lock); |
| |
| return -1; |
| } |
| |
| static void free_huge_buf(struct mlx5_context *ctx, struct mlx5_buf *buf) |
| { |
| int nchunk; |
| |
| nchunk = buf->length / MLX5_Q_CHUNK_SIZE; |
| if (!nchunk) |
| return; |
| |
| mlx5_spin_lock(&ctx->hugetlb_lock); |
| bitmap_free_range(&buf->hmem->bitmap, buf->base, nchunk); |
| if (mlx5_bitmap_empty(&buf->hmem->bitmap)) { |
| list_del(&buf->hmem->entry); |
| mlx5_spin_unlock(&ctx->hugetlb_lock); |
| free_huge_mem(buf->hmem); |
| } else |
| mlx5_spin_unlock(&ctx->hugetlb_lock); |
| } |
| |
| void mlx5_free_buf_extern(struct mlx5_context *ctx, struct mlx5_buf *buf) |
| { |
| ibv_dofork_range(buf->buf, buf->length); |
| ctx->extern_alloc.free(buf->buf, ctx->extern_alloc.data); |
| } |
| |
| int mlx5_alloc_buf_extern(struct mlx5_context *ctx, struct mlx5_buf *buf, |
| size_t size) |
| { |
| void *addr; |
| |
| addr = ctx->extern_alloc.alloc(size, ctx->extern_alloc.data); |
| if (addr || size == 0) { |
| if (ibv_dontfork_range(addr, size)) { |
| mlx5_dbg(stderr, MLX5_DBG_CONTIG, |
| "External mode dontfork_range failed\n"); |
| ctx->extern_alloc.free(addr, |
| ctx->extern_alloc.data); |
| return -1; |
| } |
| buf->buf = addr; |
| buf->length = size; |
| buf->type = MLX5_ALLOC_TYPE_EXTERNAL; |
| return 0; |
| } |
| |
| mlx5_dbg(stderr, MLX5_DBG_CONTIG, "External alloc failed\n"); |
| return -1; |
| } |
| |
| static void mlx5_free_buf_custom(struct mlx5_context *ctx, |
| struct mlx5_buf *buf) |
| { |
| struct mlx5_parent_domain *mparent_domain = buf->mparent_domain; |
| |
| mparent_domain->free(&mparent_domain->mpd.ibv_pd, |
| mparent_domain->pd_context, |
| buf->buf, |
| buf->resource_type); |
| } |
| |
| static int mlx5_alloc_buf_custom(struct mlx5_context *ctx, |
| struct mlx5_buf *buf, size_t size) |
| { |
| struct mlx5_parent_domain *mparent_domain = buf->mparent_domain; |
| void *addr; |
| |
| addr = mparent_domain->alloc(&mparent_domain->mpd.ibv_pd, |
| mparent_domain->pd_context, size, |
| buf->req_alignment, |
| buf->resource_type); |
| if (addr == IBV_ALLOCATOR_USE_DEFAULT) |
| return 1; |
| |
| if (addr || size == 0) { |
| buf->buf = addr; |
| buf->length = size; |
| buf->type = MLX5_ALLOC_TYPE_CUSTOM; |
| return 0; |
| } |
| |
| return -1; |
| } |
| |
| int mlx5_alloc_prefered_buf(struct mlx5_context *mctx, |
| struct mlx5_buf *buf, |
| size_t size, int page_size, |
| enum mlx5_alloc_type type, |
| const char *component) |
| { |
| int ret; |
| |
| if (type == MLX5_ALLOC_TYPE_CUSTOM) { |
| ret = mlx5_alloc_buf_custom(mctx, buf, size); |
| if (ret <= 0) |
| return ret; |
| |
| /* Fallback - default allocation is required */ |
| } |
| |
| /* |
| * Fallback mechanism priority: |
| * huge pages |
| * contig pages |
| * default |
| */ |
| if (type == MLX5_ALLOC_TYPE_HUGE || |
| type == MLX5_ALLOC_TYPE_PREFER_HUGE || |
| type == MLX5_ALLOC_TYPE_ALL) { |
| ret = alloc_huge_buf(mctx, buf, size, page_size); |
| if (!ret) |
| return 0; |
| |
| if (type == MLX5_ALLOC_TYPE_HUGE) |
| return -1; |
| |
| mlx5_dbg(stderr, MLX5_DBG_CONTIG, |
| "Huge mode allocation failed, fallback to %s mode\n", |
| MLX5_ALLOC_TYPE_ALL ? "contig" : "default"); |
| } |
| |
| if (type == MLX5_ALLOC_TYPE_CONTIG || |
| type == MLX5_ALLOC_TYPE_PREFER_CONTIG || |
| type == MLX5_ALLOC_TYPE_ALL) { |
| ret = mlx5_alloc_buf_contig(mctx, buf, size, page_size, component); |
| if (!ret) |
| return 0; |
| |
| if (type == MLX5_ALLOC_TYPE_CONTIG) |
| return -1; |
| mlx5_dbg(stderr, MLX5_DBG_CONTIG, |
| "Contig allocation failed, fallback to default mode\n"); |
| } |
| |
| if (type == MLX5_ALLOC_TYPE_EXTERNAL) |
| return mlx5_alloc_buf_extern(mctx, buf, size); |
| |
| return mlx5_alloc_buf(buf, size, page_size); |
| |
| } |
| |
| int mlx5_free_actual_buf(struct mlx5_context *ctx, struct mlx5_buf *buf) |
| { |
| int err = 0; |
| |
| switch (buf->type) { |
| case MLX5_ALLOC_TYPE_ANON: |
| mlx5_free_buf(buf); |
| break; |
| |
| case MLX5_ALLOC_TYPE_HUGE: |
| free_huge_buf(ctx, buf); |
| break; |
| |
| case MLX5_ALLOC_TYPE_CONTIG: |
| mlx5_free_buf_contig(ctx, buf); |
| break; |
| |
| case MLX5_ALLOC_TYPE_EXTERNAL: |
| mlx5_free_buf_extern(ctx, buf); |
| break; |
| |
| case MLX5_ALLOC_TYPE_CUSTOM: |
| mlx5_free_buf_custom(ctx, buf); |
| break; |
| |
| default: |
| mlx5_err(ctx->dbg_fp, "Bad allocation type\n"); |
| } |
| |
| return err; |
| } |
| |
| /* This function computes log2(v) rounded up. |
| We don't want to have a dependency to libm which exposes ceil & log2 APIs. |
| Code was written based on public domain code: |
| URL: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog. |
| */ |
| static uint32_t mlx5_get_block_order(uint32_t v) |
| { |
| static const uint32_t bits_arr[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; |
| static const uint32_t shift_arr[] = {1, 2, 4, 8, 16}; |
| int i; |
| uint32_t input_val = v; |
| |
| register uint32_t r = 0;/* result of log2(v) will go here */ |
| for (i = 4; i >= 0; i--) { |
| if (v & bits_arr[i]) { |
| v >>= shift_arr[i]; |
| r |= shift_arr[i]; |
| } |
| } |
| /* Rounding up if required */ |
| r += !!(input_val & ((1 << r) - 1)); |
| |
| return r; |
| } |
| |
| bool mlx5_is_custom_alloc(struct ibv_pd *pd) |
| { |
| struct mlx5_parent_domain *mparent_domain = to_mparent_domain(pd); |
| |
| return (mparent_domain && mparent_domain->alloc && mparent_domain->free); |
| } |
| |
| bool mlx5_is_extern_alloc(struct mlx5_context *context) |
| { |
| return context->extern_alloc.alloc && context->extern_alloc.free; |
| } |
| |
| void mlx5_get_alloc_type(struct mlx5_context *context, |
| struct ibv_pd *pd, |
| const char *component, |
| enum mlx5_alloc_type *alloc_type, |
| enum mlx5_alloc_type default_type) |
| |
| { |
| char *env_value; |
| char name[128]; |
| |
| if (mlx5_is_custom_alloc(pd)) { |
| *alloc_type = MLX5_ALLOC_TYPE_CUSTOM; |
| return; |
| } |
| |
| if (mlx5_is_extern_alloc(context)) { |
| *alloc_type = MLX5_ALLOC_TYPE_EXTERNAL; |
| return; |
| } |
| |
| snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component); |
| |
| *alloc_type = default_type; |
| |
| env_value = getenv(name); |
| if (env_value) { |
| if (!strcasecmp(env_value, "ANON")) |
| *alloc_type = MLX5_ALLOC_TYPE_ANON; |
| else if (!strcasecmp(env_value, "HUGE")) |
| *alloc_type = MLX5_ALLOC_TYPE_HUGE; |
| else if (!strcasecmp(env_value, "CONTIG")) |
| *alloc_type = MLX5_ALLOC_TYPE_CONTIG; |
| else if (!strcasecmp(env_value, "PREFER_CONTIG")) |
| *alloc_type = MLX5_ALLOC_TYPE_PREFER_CONTIG; |
| else if (!strcasecmp(env_value, "PREFER_HUGE")) |
| *alloc_type = MLX5_ALLOC_TYPE_PREFER_HUGE; |
| else if (!strcasecmp(env_value, "ALL")) |
| *alloc_type = MLX5_ALLOC_TYPE_ALL; |
| } |
| } |
| |
| static void mlx5_alloc_get_env_info(struct mlx5_context *mctx, |
| int *max_block_log, |
| int *min_block_log, |
| const char *component) |
| |
| { |
| char *env; |
| int value; |
| char name[128]; |
| |
| /* First set defaults */ |
| *max_block_log = MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE; |
| *min_block_log = MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE; |
| |
| snprintf(name, sizeof(name), "%s_MAX_LOG2_CONTIG_BSIZE", component); |
| env = getenv(name); |
| if (env) { |
| value = atoi(env); |
| if (value <= MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE && |
| value >= MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE) |
| *max_block_log = value; |
| else |
| mlx5_err(mctx->dbg_fp, "Invalid value %d for %s\n", |
| value, name); |
| } |
| sprintf(name, "%s_MIN_LOG2_CONTIG_BSIZE", component); |
| env = getenv(name); |
| if (env) { |
| value = atoi(env); |
| if (value >= MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE && |
| value <= *max_block_log) |
| *min_block_log = value; |
| else |
| mlx5_err(mctx->dbg_fp, "Invalid value %d for %s\n", |
| value, name); |
| } |
| } |
| |
| int mlx5_alloc_buf_contig(struct mlx5_context *mctx, |
| struct mlx5_buf *buf, size_t size, |
| int page_size, |
| const char *component) |
| { |
| void *addr = MAP_FAILED; |
| int block_size_exp; |
| int max_block_log; |
| int min_block_log; |
| struct ibv_context *context = &mctx->ibv_ctx.context; |
| off_t offset; |
| |
| mlx5_alloc_get_env_info(mctx, &max_block_log, |
| &min_block_log, |
| component); |
| |
| block_size_exp = mlx5_get_block_order(size); |
| |
| if (block_size_exp > max_block_log) |
| block_size_exp = max_block_log; |
| |
| do { |
| offset = 0; |
| set_command(MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES, &offset); |
| set_order(block_size_exp, &offset); |
| addr = mmap(NULL , size, PROT_WRITE | PROT_READ, MAP_SHARED, |
| context->cmd_fd, page_size * offset); |
| if (addr != MAP_FAILED) |
| break; |
| |
| /* |
| * The kernel returns EINVAL if not supported |
| */ |
| if (errno == EINVAL) |
| return -1; |
| |
| block_size_exp -= 1; |
| } while (block_size_exp >= min_block_log); |
| mlx5_dbg(mctx->dbg_fp, MLX5_DBG_CONTIG, "block order %d, addr %p\n", |
| block_size_exp, addr); |
| |
| if (addr == MAP_FAILED) |
| return -1; |
| |
| if (ibv_dontfork_range(addr, size)) { |
| munmap(addr, size); |
| return -1; |
| } |
| |
| buf->buf = addr; |
| buf->length = size; |
| buf->type = MLX5_ALLOC_TYPE_CONTIG; |
| |
| return 0; |
| } |
| |
| void mlx5_free_buf_contig(struct mlx5_context *mctx, struct mlx5_buf *buf) |
| { |
| ibv_dofork_range(buf->buf, buf->length); |
| munmap(buf->buf, buf->length); |
| } |
| |
| int mlx5_alloc_buf(struct mlx5_buf *buf, size_t size, int page_size) |
| { |
| int ret; |
| int al_size; |
| |
| al_size = align(size, page_size); |
| ret = posix_memalign(&buf->buf, page_size, al_size); |
| if (ret) |
| return ret; |
| |
| ret = ibv_dontfork_range(buf->buf, al_size); |
| if (ret) |
| free(buf->buf); |
| |
| if (!ret) { |
| buf->length = al_size; |
| buf->type = MLX5_ALLOC_TYPE_ANON; |
| } |
| |
| return ret; |
| } |
| |
| void mlx5_free_buf(struct mlx5_buf *buf) |
| { |
| ibv_dofork_range(buf->buf, buf->length); |
| free(buf->buf); |
| } |