GIT 0a604d99af051d3832a11189da1a85e13294ebea git+ssh://master.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git#for-mm commit 3f1244a2f8d3892f991b662cea49b2a0b4e0c115 Author: Or Gerlitz Date: Thu May 11 10:03:30 2006 +0300 IB/iser: iSER Kconfig and Makefile Kconfig and Makefile for iSER. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier commit 6461f64ab51e6929680df064b2682004a1548290 Author: Or Gerlitz Date: Thu May 11 10:03:08 2006 +0300 IB/iser: iSER handling of memory for RDMA This file contains the processing carried over an SG list associated with a SCSI command such that it can be registered with the IB verbs. The registration produces a network virtual address (VA) and a remote access key (RKEY or STAG in iSER spec notation) which are used by the target for its RDMA operation. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier commit 1cfa0a75dbef1d5bf687aacafabb023288f6b36a Author: Or Gerlitz Date: Thu May 11 10:02:46 2006 +0300 IB/iser: iSER RDMA CM (CMA) and IB verbs interaction This file contains the low level interaction with the RDMA CM and the IB verbs, where iSER is consumer of both. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier commit e85b24b5e7de9f507c6253183d089370f37618c5 Author: Or Gerlitz Date: Thu May 11 10:02:19 2006 +0300 IB/iser: iSER initiator iSCSI PDU and TX/RX This file contains the iSER initiator processing of iSCSI PDUs - controls, commands and data-outs along with processing of TX and RX completions. It interacts with the lower level iser code doing the memory registration and and the cma and verbs calls. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier commit 65e7ae7bfc71219f13162b3bbad44e6471cd67f9 Author: Or Gerlitz Date: Thu May 11 10:00:44 2006 +0300 IB/iser: iSCSI iSER transport provider high level code This file contains the code that registeres with the iscsi transport manager and with the SCSI Mid Layer, where much of the provided functions to iSCSI and SCSI are implemented in libiscsi. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier commit 49cd5382f629bde2aee9f817cefb271106dc47ee Author: Or Gerlitz Date: Thu May 11 10:00:21 2006 +0300 IB/iser: iSCSI iSER transport provider header file iSER (iSCSI Extensions for RDMA) transport provider driver for the iSCSI initiator, whose other parts (under drivers/scsi) are scsi_transport_iscsi - the transport management module, iscsi_tcp - the TCP transport provider module and libiscsi - a kernel library (module) implementing functionality needed by both TCP and iSER transports. iSER is both a provider of the iSCSI transport api and a SCSI low level driver. This file contains internal data structures and non static service functions. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier commit d02f4851a36491c238e1e03592d05324453810fa Author: Sean Hefty Date: Mon Mar 6 15:41:14 2006 -0800 IB: userspace support for RDMA connection manager Kernel component necessary to support the userspace RDMA connection management library. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier commit bae44d4b0159ca1ce7e6398a92fbf5e878918d59 Author: Heiko J Schick Date: Thu Jun 22 07:49:38 2006 -0700 IB/ehca: build_system Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 828becd828fc63d47c602caca47ecd50ed2fe3d0 Author: Heiko J Schick Date: Thu Jun 22 07:49:38 2006 -0700 IB/ehca: phyp Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 16bc5356eebe330a07e86a088d8680e1e2865c78 Author: Heiko J Schick Date: Thu Jun 22 07:49:37 2006 -0700 IB/ehca: ipz Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 3008fac64a7b03c2931506521780103f32978c9c Author: Heiko J Schick Date: Thu Jun 22 07:49:37 2006 -0700 IB/ehca: hipz Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 0ec7dfc058cc746084063485640f65b91a83fe85 Author: Heiko J Schick Date: Thu Jun 22 07:49:37 2006 -0700 IB/ehca: fwif Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 34f7af69044c912db8241196ae88630424e2120a Author: Heiko J Schick Date: Thu Jun 22 07:49:37 2006 -0700 IB/ehca: qp Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 42e662806456498dcb83eda4e042e04feaa0e907 Author: Heiko J Schick Date: Thu Jun 22 07:49:36 2006 -0700 IB/ehca: cq Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 4925c2b22ec184f9d3c54cb002222780a73f96c6 Author: Heiko J Schick Date: Thu Jun 22 07:49:36 2006 -0700 IB/ehca: eq Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 208284d9da164dbceb4813100187af1eacd4812e Author: Heiko J Schick Date: Thu Jun 22 07:49:36 2006 -0700 IB/ehca: avpd Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 247f413d4b3bd920b46985d894d5e6b2e4c15765 Author: Heiko J Schick Date: Thu Jun 22 07:49:36 2006 -0700 IB/ehca: mrmw Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 9c953954a33581d02673f7b149f29cf1715f6f74 Author: Heiko J Schick Date: Thu Jun 22 07:49:35 2006 -0700 IB/ehca: irq Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit b2aab60197f1394f3fc15d840cb3c6bbd3353fa0 Author: Heiko J Schick Date: Thu Jun 22 07:49:35 2006 -0700 IB/ehca: includes Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit fe34e6e4c84c6aae22059bcc83d9a8902a09d81d Author: Heiko J Schick Date: Thu Jun 22 07:49:35 2006 -0700 IB/ehca: hca Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit ac475aa35a6c99036ef9fb3b03f73d9667d49d7a Author: Heiko J Schick Date: Thu Jun 22 07:49:34 2006 -0700 IB/ehca: uverbs Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit d0d7e3c063ba9cc53e84778bf8e0f59ffdedc5b5 Author: Heiko J Schick Date: Thu Jun 22 07:49:34 2006 -0700 IB/ehca: classes Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 79be70269912e7fc96485122be1dde8c4f71ab51 Author: Heiko J Schick Date: Thu Jun 22 07:49:34 2006 -0700 IB/ehca: main Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier commit 9b8efc0242777353d5d62bfaa7766ebf0b5598b4 Author: Roland Dreier Date: Thu Jun 22 07:47:27 2006 -0700 IB/uverbs: Remove unnecessary list_del()s In ib_uverbs_cleanup_ucontext(), when iterating through the lists of objects, there's no reason to do list_del() to remove the objects, since both the objects and the lists that contain them are about to be freed anyway. Since list_del() is a moderately big inline function, getting rid of this extra work saves quite a bit of .text: add/remove: 0/0 grow/shrink: 1/2 up/down: 3/-217 (-214) function old new delta ib_uverbs_comp_handler 225 228 +3 ib_uverbs_async_handler 256 255 -1 ib_uverbs_close 905 689 -216 Signed-off-by: Roland Dreier commit 183208284e3ab3816189fc0e23faf29675ce6c9f Author: Krishna Kumar Date: Thu Jun 22 07:47:27 2006 -0700 IB/uverbs: Don't free wr list when it's known to be empty In ib_uverbs_post_send(), move the "out:" label after the loop that frees the list of work requests, since the only place that jumps there is before any work requests could possibly be added to the list. This removes a compile warning: "is_ud might be used uninitialized in this function". Signed-off-by: Krishna Kumar Signed-off-by: Roland Dreier --- diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index ba2d650..fd2d528 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -36,9 +36,12 @@ config INFINIBAND_ADDR_TRANS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/ipath/Kconfig" +source "drivers/infiniband/hw/ehca/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" source "drivers/infiniband/ulp/srp/Kconfig" +source "drivers/infiniband/ulp/iser/Kconfig" + endmenu diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index eea2732..893bee0 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -1,5 +1,7 @@ obj-$(CONFIG_INFINIBAND) += core/ obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ obj-$(CONFIG_IPATH_CORE) += hw/ipath/ +obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ +obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 68e73ec..21be457 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,9 +1,10 @@ infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o +user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ ib_cm.o $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o -obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o +obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o $(user_access-y) ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ device.o fmr_pool.o cache.o @@ -16,6 +17,8 @@ ib_cm-y := cm.o rdma_cm-y := cma.o +rdma_ucm-y := ucma.o + ib_addr-y := addr.o ib_umad-y := user_mad.o diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c new file mode 100644 index 0000000..3e21f10 --- /dev/null +++ b/drivers/infiniband/core/ucma.c @@ -0,0 +1,811 @@ +/* + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); +MODULE_LICENSE("Dual BSD/GPL"); + +enum { + UCMA_MAX_BACKLOG = 128 +}; + +struct ucma_file { + struct mutex file_mutex; + struct file *filp; + struct list_head ctxs; + struct list_head events; + wait_queue_head_t poll_wait; +}; + +struct ucma_context { + int id; + wait_queue_head_t wait; + atomic_t ref; + int events_reported; + int backlog; + + struct ucma_file *file; + struct rdma_cm_id *cm_id; + __u64 uid; + + struct list_head events; /* list of pending events. */ + struct list_head file_list; /* member in file ctx list */ +}; + +struct ucma_event { + struct ucma_context *ctx; + struct list_head file_list; /* member in file event list */ + struct list_head ctx_list; /* member in ctx event list */ + struct rdma_cm_id *cm_id; + struct rdma_ucm_event_resp resp; +}; + +static DEFINE_MUTEX(ctx_mutex); +static DEFINE_IDR(ctx_idr); + +static struct ucma_context* ucma_get_ctx(struct ucma_file *file, int id) +{ + struct ucma_context *ctx; + + mutex_lock(&ctx_mutex); + ctx = idr_find(&ctx_idr, id); + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + else + atomic_inc(&ctx->ref); + mutex_unlock(&ctx_mutex); + + return ctx; +} + +static void ucma_put_ctx(struct ucma_context *ctx) +{ + if (atomic_dec_and_test(&ctx->ref)) + wake_up(&ctx->wait); +} + +static void ucma_cleanup_events(struct ucma_context *ctx) +{ + struct ucma_event *uevent; + + mutex_lock(&ctx->file->file_mutex); + list_del(&ctx->file_list); + while (!list_empty(&ctx->events)) { + + uevent = list_entry(ctx->events.next, struct ucma_event, + ctx_list); + list_del(&uevent->file_list); + list_del(&uevent->ctx_list); + + /* clear incoming connections. */ + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) + rdma_destroy_id(uevent->cm_id); + + kfree(uevent); + } + mutex_unlock(&ctx->file->file_mutex); +} + +static struct ucma_context* ucma_alloc_ctx(struct ucma_file *file) +{ + struct ucma_context *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + atomic_set(&ctx->ref, 1); + init_waitqueue_head(&ctx->wait); + ctx->file = file; + INIT_LIST_HEAD(&ctx->events); + + do { + ret = idr_pre_get(&ctx_idr, GFP_KERNEL); + if (!ret) + goto error; + + mutex_lock(&ctx_mutex); + ret = idr_get_new(&ctx_idr, ctx, &ctx->id); + mutex_unlock(&ctx_mutex); + } while (ret == -EAGAIN); + + if (ret) + goto error; + + list_add_tail(&ctx->file_list, &file->ctxs); + return ctx; + +error: + kfree(ctx); + return NULL; +} + +static int ucma_event_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct ucma_event *uevent; + struct ucma_context *ctx = cm_id->context; + int ret = 0; + + uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); + if (!uevent) + return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; + + uevent->ctx = ctx; + uevent->cm_id = cm_id; + uevent->resp.uid = ctx->uid; + uevent->resp.id = ctx->id; + uevent->resp.event = event->event; + uevent->resp.status = event->status; + if ((uevent->resp.private_data_len = event->private_data_len)) + memcpy(uevent->resp.private_data, event->private_data, + event->private_data_len); + + mutex_lock(&ctx->file->file_mutex); + if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { + if (!ctx->backlog) { + ret = -EDQUOT; + goto out; + } + ctx->backlog--; + } + list_add_tail(&uevent->file_list, &ctx->file->events); + list_add_tail(&uevent->ctx_list, &ctx->events); + wake_up_interruptible(&ctx->file->poll_wait); +out: + mutex_unlock(&ctx->file->file_mutex); + return ret; +} + +static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct ucma_context *ctx; + struct rdma_ucm_get_event cmd; + struct ucma_event *uevent; + int ret = 0; + DEFINE_WAIT(wait); + + if (out_len < sizeof(struct rdma_ucm_event_resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&file->file_mutex); + while (list_empty(&file->events)) { + if (file->filp->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; + } + + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE); + mutex_unlock(&file->file_mutex); + schedule(); + mutex_lock(&file->file_mutex); + finish_wait(&file->poll_wait, &wait); + } + + if (ret) + goto done; + + uevent = list_entry(file->events.next, struct ucma_event, file_list); + + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { + ctx = ucma_alloc_ctx(file); + if (!ctx) { + ret = -ENOMEM; + goto done; + } + uevent->ctx->backlog++; + ctx->cm_id = uevent->cm_id; + ctx->cm_id->context = ctx; + uevent->resp.id = ctx->id; + } + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &uevent->resp, sizeof(uevent->resp))) { + ret = -EFAULT; + goto done; + } + + list_del(&uevent->file_list); + list_del(&uevent->ctx_list); + uevent->ctx->events_reported++; + kfree(uevent); +done: + mutex_unlock(&file->file_mutex); + return ret; +} + +static ssize_t ucma_create_id(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_create_id cmd; + struct rdma_ucm_create_id_resp resp; + struct ucma_context *ctx; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&file->file_mutex); + ctx = ucma_alloc_ctx(file); + mutex_unlock(&file->file_mutex); + if (!ctx) + return -ENOMEM; + + ctx->uid = cmd.uid; + ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, RDMA_PS_TCP); + if (IS_ERR(ctx->cm_id)) { + ret = PTR_ERR(ctx->cm_id); + goto err1; + } + + resp.id = ctx->id; + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + ret = -EFAULT; + goto err2; + } + return 0; + +err2: + rdma_destroy_id(ctx->cm_id); +err1: + mutex_lock(&ctx_mutex); + idr_remove(&ctx_idr, ctx->id); + mutex_unlock(&ctx_mutex); + kfree(ctx); + return ret; +} + +static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_destroy_id cmd; + struct rdma_ucm_destroy_id_resp resp; + struct ucma_context *ctx; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + mutex_lock(&ctx_mutex); + ctx = idr_find(&ctx_idr, cmd.id); + if (!ctx) + ctx = ERR_PTR(-ENOENT); + else if (ctx->file != file) + ctx = ERR_PTR(-EINVAL); + else + idr_remove(&ctx_idr, ctx->id); + mutex_unlock(&ctx_mutex); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + atomic_dec(&ctx->ref); + wait_event(ctx->wait, !atomic_read(&ctx->ref)); + + /* No new events will be generated after destroying the id. */ + rdma_destroy_id(ctx->cm_id); + /* Cleanup events not yet reported to the user. */ + ucma_cleanup_events(ctx); + + resp.events_reported = ctx->events_reported; + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + kfree(ctx); + return ret; +} + +static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_bind_addr cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_resolve_addr(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_resolve_addr cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, + (struct sockaddr *) &cmd.dst_addr, + cmd.timeout_ms); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_resolve_route(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_resolve_route cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); + ucma_put_ctx(ctx); + return ret; +} + +static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, + struct rdma_route *route) +{ + struct rdma_dev_addr *dev_addr; + + resp->num_paths = route->num_paths; + switch (route->num_paths) { + case 0: + dev_addr = &route->addr.dev_addr; + memcpy(&resp->ib_route[0].dgid, ib_addr_get_dgid(dev_addr), + sizeof(union ib_gid)); + memcpy(&resp->ib_route[0].sgid, ib_addr_get_sgid(dev_addr), + sizeof(union ib_gid)); + resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); + break; + case 2: + ib_copy_path_rec_to_user(&resp->ib_route[1], + &route->path_rec[1]); + /* fall through */ + case 1: + ib_copy_path_rec_to_user(&resp->ib_route[0], + &route->path_rec[0]); + break; + default: + break; + } +} + +static ssize_t ucma_query_route(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_query_route cmd; + struct rdma_ucm_query_route_resp resp; + struct ucma_context *ctx; + struct sockaddr *addr; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + memset(&resp, 0, sizeof resp); + addr = &ctx->cm_id->route.addr.src_addr; + memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + addr = &ctx->cm_id->route.addr.dst_addr; + memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + if (!ctx->cm_id->device) + goto out; + + resp.node_guid = ctx->cm_id->device->node_guid; + resp.port_num = ctx->cm_id->port_num; + switch (ctx->cm_id->device->node_type) { + case IB_NODE_CA: + ucma_copy_ib_route(&resp, &ctx->cm_id->route); + default: + break; + } + +out: + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + ucma_put_ctx(ctx); + return ret; +} + +static void ucma_copy_conn_param(struct rdma_conn_param *dst_conn, + struct rdma_ucm_conn_param *src_conn) +{ + dst_conn->private_data = src_conn->private_data; + dst_conn->private_data_len = src_conn->private_data_len; + dst_conn->responder_resources =src_conn->responder_resources; + dst_conn->initiator_depth = src_conn->initiator_depth; + dst_conn->flow_control = src_conn->flow_control; + dst_conn->retry_count = src_conn->retry_count; + dst_conn->rnr_retry_count = src_conn->rnr_retry_count; + dst_conn->srq = src_conn->srq; + dst_conn->qp_num = src_conn->qp_num; + dst_conn->qp_type = src_conn->qp_type; +} + +static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_connect cmd; + struct rdma_conn_param conn_param; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + if (!cmd.conn_param.valid) + return -EINVAL; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ucma_copy_conn_param(&conn_param, &cmd.conn_param); + ret = rdma_connect(ctx->cm_id, &conn_param); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_listen cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ctx->backlog = cmd.backlog > 0 && cmd.backlog < UCMA_MAX_BACKLOG ? + cmd.backlog : UCMA_MAX_BACKLOG; + ret = rdma_listen(ctx->cm_id, ctx->backlog); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_accept cmd; + struct rdma_conn_param conn_param; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + if (cmd.conn_param.valid) { + ctx->uid = cmd.uid; + ucma_copy_conn_param(&conn_param, &cmd.conn_param); + ret = rdma_accept(ctx->cm_id, &conn_param); + } else + ret = rdma_accept(ctx->cm_id, NULL); + + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_reject cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_disconnect cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_disconnect(ctx->cm_id); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_init_qp_attr(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_init_qp_attr cmd; + struct ib_uverbs_qp_attr resp; + struct ucma_context *ctx; + struct ib_qp_attr qp_attr; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + resp.qp_attr_mask = 0; + memset(&qp_attr, 0, sizeof qp_attr); + qp_attr.qp_state = cmd.qp_state; + ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); + if (ret) + goto out; + + ib_copy_qp_attr_to_user(&resp, &qp_attr); + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + +out: + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) = { + [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, + [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, + [RDMA_USER_CM_CMD_BIND_ADDR] = ucma_bind_addr, + [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, + [RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route, + [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, + [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, + [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, + [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, + [RDMA_USER_CM_CMD_REJECT] = ucma_reject, + [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, + [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, + [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event +}; + +static ssize_t ucma_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct ucma_file *file = filp->private_data; + struct rdma_ucm_cmd_hdr hdr; + ssize_t ret; + + if (len < sizeof(hdr)) + return -EINVAL; + + if (copy_from_user(&hdr, buf, sizeof(hdr))) + return -EFAULT; + + if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) + return -EINVAL; + + if (hdr.in + sizeof(hdr) > len) + return -EINVAL; + + ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); + if (!ret) + ret = len; + + return ret; +} + +static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait) +{ + struct ucma_file *file = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &file->poll_wait, wait); + + mutex_lock(&file->file_mutex); + if (!list_empty(&file->events)) + mask = POLLIN | POLLRDNORM; + mutex_unlock(&file->file_mutex); + + return mask; +} + +static int ucma_open(struct inode *inode, struct file *filp) +{ + struct ucma_file *file; + + file = kmalloc(sizeof *file, GFP_KERNEL); + if (!file) + return -ENOMEM; + + INIT_LIST_HEAD(&file->events); + INIT_LIST_HEAD(&file->ctxs); + init_waitqueue_head(&file->poll_wait); + mutex_init(&file->file_mutex); + + filp->private_data = file; + file->filp = filp; + return 0; +} + +static int ucma_close(struct inode *inode, struct file *filp) +{ + struct ucma_file *file = filp->private_data; + struct ucma_context *ctx; + + mutex_lock(&file->file_mutex); + while (!list_empty(&file->ctxs)) { + ctx = list_entry(file->ctxs.next, struct ucma_context, + file_list); + mutex_unlock(&file->file_mutex); + + mutex_lock(&ctx_mutex); + idr_remove(&ctx_idr, ctx->id); + mutex_unlock(&ctx_mutex); + + rdma_destroy_id(ctx->cm_id); + ucma_cleanup_events(ctx); + kfree(ctx); + + mutex_lock(&file->file_mutex); + } + mutex_unlock(&file->file_mutex); + kfree(file); + return 0; +} + +static struct file_operations ucma_fops = { + .owner = THIS_MODULE, + .open = ucma_open, + .release = ucma_close, + .write = ucma_write, + .poll = ucma_poll, +}; + +static struct miscdevice ucma_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "rdma_cm", + .fops = &ucma_fops, +}; + +static ssize_t show_abi_version(struct class_device *class_dev, char *buf) +{ + return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); +} +static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static int __init ucma_init(void) +{ + int ret; + + ret = misc_register(&ucma_misc); + if (ret) + return ret; + + ret = class_device_create_file(ucma_misc.class, + &class_device_attr_abi_version); + if (ret) { + printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); + goto err; + } + return 0; +err: + misc_deregister(&ucma_misc); + return ret; +} + +static void __exit ucma_cleanup(void) +{ + class_device_remove_file(ucma_misc.class, + &class_device_attr_abi_version); + misc_deregister(&ucma_misc); + idr_destroy(&ctx_idr); +} + +module_init(ucma_init); +module_exit(ucma_cleanup); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 76bf61e..a908a7b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1530,7 +1530,6 @@ ssize_t ib_uverbs_post_send(struct ib_uv out_put: put_qp_read(qp); -out: while (wr) { if (is_ud && wr->wr.ud.ah) put_ah_read(wr->wr.ud.ah); @@ -1539,6 +1538,7 @@ out: wr = next; } +out: kfree(user_wr); return ret ? ret : in_len; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 5ec2d49..dc1f4de 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -188,7 +188,6 @@ static int ib_uverbs_cleanup_ucontext(st idr_remove_uobj(&ib_uverbs_ah_idr, uobj); ib_destroy_ah(ah); - list_del(&uobj->list); kfree(uobj); } @@ -200,7 +199,6 @@ static int ib_uverbs_cleanup_ucontext(st idr_remove_uobj(&ib_uverbs_qp_idr, uobj); ib_uverbs_detach_umcast(qp, uqp); ib_destroy_qp(qp); - list_del(&uobj->list); ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } @@ -213,7 +211,6 @@ static int ib_uverbs_cleanup_ucontext(st idr_remove_uobj(&ib_uverbs_cq_idr, uobj); ib_destroy_cq(cq); - list_del(&uobj->list); ib_uverbs_release_ucq(file, ev_file, ucq); kfree(ucq); } @@ -225,7 +222,6 @@ static int ib_uverbs_cleanup_ucontext(st idr_remove_uobj(&ib_uverbs_srq_idr, uobj); ib_destroy_srq(srq); - list_del(&uobj->list); ib_uverbs_release_uevent(file, uevent); kfree(uevent); } @@ -243,7 +239,6 @@ static int ib_uverbs_cleanup_ucontext(st memobj = container_of(uobj, struct ib_umem_object, uobject); ib_umem_release_on_close(mrdev, &memobj->umem); - list_del(&uobj->list); kfree(memobj); } @@ -252,7 +247,6 @@ static int ib_uverbs_cleanup_ucontext(st idr_remove_uobj(&ib_uverbs_pd_idr, uobj); ib_dealloc_pd(pd); - list_del(&uobj->list); kfree(uobj); } diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig new file mode 100644 index 0000000..925ec7d --- /dev/null +++ b/drivers/infiniband/hw/ehca/Kconfig @@ -0,0 +1,13 @@ +config INFINIBAND_EHCA + tristate "eHCA support" + depends on IBMEBUS && INFINIBAND + ---help--- + This is a low level device driver for the IBM GX based Host channel + adapters (HCAs). + +config INFINIBAND_EHCA_SCALING + bool "Scaling support (EXPERIMENTAL)" + depends on IBMEBUS && INFINIBAND_EHCA && EXPERIMENTAL && !HOTPLUG_CPU + ---help--- + eHCA scaling support schedules the CQ callbacks to different CPUs + in thread context. diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile new file mode 100644 index 0000000..ee19ce3 --- /dev/null +++ b/drivers/infiniband/hw/ehca/Makefile @@ -0,0 +1,16 @@ +# Authors: Heiko J Schick +# Christoph Raisch +# +# Copyright (c) 2005 IBM Corporation +# +# All rights reserved. +# +# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. + +obj-$(CONFIG_INFINIBAND_EHCA) += hcad_mod.o + +hcad_mod-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ + ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ + ehca_uverbs.o hcp_if.o hcp_phyp.o ipz_pt_fn.o + +CFLAGS += -DEHCA_USE_HCALL -DEHCA_USE_HCALL_KERNEL diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c new file mode 100644 index 0000000..fd9fc6d --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -0,0 +1,303 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * adress vector functions + * + * Authors: Hoang-Nam Nguyen + * Khadija Souissi + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#define DEB_PREFIX "ehav" + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + extern struct ehca_module ehca_module; + extern int ehca_static_rate; + int ret = 0; + struct ehca_av *av = NULL; + struct ehca_shca *shca = NULL; + + EHCA_CHECK_PD_P(pd); + EHCA_CHECK_ADR_P(ah_attr); + + shca = container_of(pd->device, struct ehca_shca, ib_device); + + EDEB_EN(7, "pd=%p ah_attr=%p", pd, ah_attr); + + av = kmem_cache_alloc(ehca_module.cache_av, SLAB_KERNEL); + if (!av) { + EDEB_ERR(4, "Out of memory pd=%p ah_attr=%p", pd, ah_attr); + ret = -ENOMEM; + goto create_ah_exit0; + } + + av->av.sl = ah_attr->sl; + av->av.dlid = ah_attr->dlid; + av->av.slid_path_bits = ah_attr->src_path_bits; + + if (ehca_static_rate < 0) { + int ah_mult = ib_rate_to_mult(ah_attr->static_rate); + int ehca_mult = + ib_rate_to_mult(shca->sport[ah_attr->port_num].rate ); + + if (ah_mult >= ehca_mult) + av->av.ipd = 0; + else + av->av.ipd = (ah_mult > 0) ? + ((ehca_mult - 1) / ah_mult) : 0; + } else + av->av.ipd = ehca_static_rate; + + EDEB(7, "IPD av->av.ipd set =%x ah_attr->static_rate=%x " + "shca_ib_rate=%x ",av->av.ipd, ah_attr->static_rate, + shca->sport[ah_attr->port_num].rate); + + av->av.lnh = ah_attr->ah_flags; + av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc = 0; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(pd->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ret = -EINVAL; + EDEB_ERR(4, "Invalid port number " + "ehca_query_port() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(pd->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ret = -EINVAL; + EDEB_ERR(4, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); + } + /* for the time being we use a hard coded PMTU of 2048 Bytes */ + av->av.pmtu = 4; + + /* dgid comes in grh.word_3 */ + memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + EHCA_REGISTER_AV(device, pd); + + EDEB_EX(7, "pd=%p ah_attr=%p av=%p", pd, ah_attr, av); + return &av->ib_ah; + +create_ah_exit1: + kmem_cache_free(ehca_module.cache_av, av); + +create_ah_exit0: + EDEB_EX(7, "ret=%x pd=%p ah_attr=%p", ret, pd, ah_attr); + + return ERR_PTR(ret); +} + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av = NULL; + struct ehca_ud_av new_ehca_av; + struct ehca_pd *my_pd = NULL; + u32 cur_pid = current->tgid; + int ret = 0; + + EHCA_CHECK_AV(ah); + EHCA_CHECK_ADR(ah_attr); + + EDEB_EN(7, "ah=%p ah_attr=%p", ah, ah_attr); + + my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + memset(&new_ehca_av, 0, sizeof(new_ehca_av)); + new_ehca_av.sl = ah_attr->sl; + new_ehca_av.dlid = ah_attr->dlid; + new_ehca_av.slid_path_bits = ah_attr->src_path_bits; + new_ehca_av.ipd = ah_attr->static_rate; + new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, + (ah_attr->ah_flags & IB_AH_GRH) > 0); + new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); + + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc = 0; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(ah->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ret = -EINVAL; + EDEB_ERR(4, "Invalid port number " + "ehca_query_port() returned %x " + "ah=%p ah_attr=%p port_num=%x", + rc, ah, ah_attr, ah_attr->port_num); + goto modify_ah_exit1; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(ah->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ret = -EINVAL; + EDEB_ERR(4, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "ah=%p ah_attr=%p port_num=%x " + "sgid_index=%x", + rc, ah, ah_attr, ah_attr->port_num, + ah_attr->grh.sgid_index); + goto modify_ah_exit1; + } + memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); + } + + new_ehca_av.pmtu = 4; /* see also comment in create_ah() */ + + memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + av = container_of(ah, struct ehca_av, ib_ah); + av->av = new_ehca_av; + +modify_ah_exit1: + EDEB_EX(7, "ret=%x ah=%p ah_attr=%p", ret, ah, ah_attr); + + return ret; +} + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + int ret = 0; + struct ehca_av *av = NULL; + struct ehca_pd *my_pd = NULL; + u32 cur_pid = current->tgid; + + EHCA_CHECK_AV(ah); + EHCA_CHECK_ADR(ah_attr); + + EDEB_EN(7, "ah=%p ah_attr=%p", ah, ah_attr); + + my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + av = container_of(ah, struct ehca_av, ib_ah); + memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, + sizeof(ah_attr->grh.dgid)); + ah_attr->sl = av->av.sl; + + ah_attr->dlid = av->av.dlid; + + ah_attr->src_path_bits = av->av.slid_path_bits; + ah_attr->static_rate = av->av.ipd; + ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); + ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, + av->av.grh.word_0); + ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, + av->av.grh.word_0); + ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, + av->av.grh.word_0); + + EDEB_EX(7, "ah=%p ah_attr=%p ret=%x", ah, ah_attr, ret); + return ret; +} + +int ehca_destroy_ah(struct ib_ah *ah) +{ + extern struct ehca_module ehca_module; + struct ehca_pd *my_pd = NULL; + u32 cur_pid = current->tgid; + int ret = 0; + + EHCA_CHECK_AV(ah); + EHCA_DEREGISTER_AV(ah); + + EDEB_EN(7, "ah=%p", ah); + + my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + kmem_cache_free(ehca_module.cache_av, + container_of(ah, struct ehca_av, ib_ah)); + + EDEB_EX(7, "ret=%x ah=%p", ret, ah); + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h new file mode 100644 index 0000000..1a87bee --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -0,0 +1,343 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Struct definition for eHCA internal structures + * + * Authors: Heiko J Schick + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_H__ +#define __EHCA_CLASSES_H__ + +#include "ehca_classes.h" +#include "ipz_pt_fn.h" + +struct ehca_module; +struct ehca_qp; +struct ehca_cq; +struct ehca_eq; +struct ehca_mr; +struct ehca_mw; +struct ehca_pd; +struct ehca_av; + +#ifdef CONFIG_PPC64 +#include "ehca_classes_pSeries.h" +#endif + +#include +#include + +#include "ehca_irq.h" + +struct ehca_module { + struct list_head shca_list; + spinlock_t shca_lock; + struct timer_list timer; + kmem_cache_t *cache_pd; + kmem_cache_t *cache_cq; + kmem_cache_t *cache_qp; + kmem_cache_t *cache_av; + kmem_cache_t *cache_mr; + kmem_cache_t *cache_mw; +}; + +struct ehca_eq { + u32 length; + struct ipz_queue ipz_queue; + struct ipz_eq_handle ipz_eq_handle; + struct work_struct work; + struct h_galpas galpas; + int is_initialized; + struct ehca_pfeq pf; + spinlock_t spinlock; + struct tasklet_struct interrupt_task; + u32 ist; +}; + +struct ehca_sport { + struct ib_cq *ibcq_aqp1; + struct ib_qp *ibqp_aqp1; + enum ib_rate rate; + enum ib_port_state port_state; +}; + +struct ehca_shca { + struct ib_device ib_device; + struct ibmebus_dev *ibmebus_dev; + u8 num_ports; + int hw_level; + struct list_head shca_list; + struct ipz_adapter_handle ipz_hca_handle; + struct ehca_sport sport[2]; + struct ehca_eq eq; + struct ehca_eq neq; + struct ehca_mr *maxmr; + struct ehca_pd *pd; + struct h_galpas galpas; +}; + +struct ehca_pd { + struct ib_pd ib_pd; + struct ipz_pd fw_pd; + u32 ownpid; +}; + +struct ehca_qp { + struct ib_qp ib_qp; + u32 qp_type; + struct ipz_queue ipz_squeue; + struct ipz_queue ipz_rqueue; + struct h_galpas galpas; + u32 qkey; + u32 real_qp_num; + u32 token; + spinlock_t spinlock_s; + spinlock_t spinlock_r; + u32 sq_max_inline_data_size; + struct ipz_qp_handle ipz_qp_handle; + struct ehca_pfqp pf; + struct ib_qp_init_attr init_attr; + u64 uspace_squeue; + u64 uspace_rqueue; + u64 uspace_fwh; + struct ehca_cq *send_cq; + struct ehca_cq *recv_cq; + unsigned int sqerr_purgeflag; + struct hlist_node list_entries; +}; + +/* must be power of 2 */ +#define QP_HASHTAB_LEN 8 + +struct ehca_cq { + struct ib_cq ib_cq; + struct ipz_queue ipz_queue; + struct h_galpas galpas; + spinlock_t spinlock; + u32 cq_number; + u32 token; + u32 nr_of_entries; + struct ipz_cq_handle ipz_cq_handle; + struct ehca_pfcq pf; + spinlock_t cb_lock; + u64 uspace_queue; + u64 uspace_fwh; + struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; + struct list_head entry; + u32 nr_callbacks; + spinlock_t task_lock; + u32 ownpid; +}; + +enum ehca_mr_flag { + EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ + EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ +}; + +struct ehca_mr { + union { + struct ib_mr ib_mr; /* must always be first in ehca_mr */ + struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ + } ib; + spinlock_t mrlock; + + enum ehca_mr_flag flags; + u32 num_pages; /* number of MR pages */ + u32 num_4k; /* number of 4k "page" portions to form MR */ + int acl; /* ACL (stored here for usage in reregister) */ + u64 *start; /* virtual start address (stored here for */ + /* usage in reregister) */ + u64 size; /* size (stored here for usage in reregister) */ + u32 fmr_page_size; /* page size for FMR */ + u32 fmr_max_pages; /* max pages for FMR */ + u32 fmr_max_maps; /* max outstanding maps for FMR */ + u32 fmr_map_cnt; /* map counter for FMR */ + /* fw specific data */ + struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ + struct h_galpas galpas; + /* data for userspace bridge */ + u32 nr_of_pages; + void *pagearray; +}; + +struct ehca_mw { + struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ + spinlock_t mwlock; + + u8 never_bound; /* indication MW was never bound */ + struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ + struct h_galpas galpas; +}; + +enum ehca_mr_pgi_type { + EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, + * ehca_rereg_phys_mr, + * ehca_reg_internal_maxmr */ + EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ + EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ +}; + +struct ehca_mr_pginfo { + enum ehca_mr_pgi_type type; + u64 num_pages; + u64 page_cnt; + u64 num_4k; /* number of 4k "page" portions */ + u64 page_4k_cnt; /* counter for 4k "page" portions */ + u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */ + + /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + + /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct ib_umem_chunk *next_chunk; + u64 next_nmap; + + /* type EHCA_MR_PGI_FMR section */ + u64 *page_list; + u64 next_listelem; + /* next_4k also used within EHCA_MR_PGI_FMR */ +}; + +/* output parameters for MR/FMR hipz calls */ +struct ehca_mr_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 lkey; + u32 rkey; + u64 len; + u64 vaddr; + u32 acl; +}; + +/* output parameters for MW hipz calls */ +struct ehca_mw_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 rkey; +}; + +struct ehca_av { + struct ib_ah ib_ah; + struct ehca_ud_av av; +}; + +struct ehca_ucontext { + struct ib_ucontext ib_ucontext; +}; + +struct ehca_module *ehca_module_new(void); + +int ehca_module_delete(struct ehca_module *me); + +int ehca_eq_ctor(struct ehca_eq *eq); + +int ehca_eq_dtor(struct ehca_eq *eq); + +struct ehca_shca *ehca_shca_new(void); + +int ehca_shca_delete(struct ehca_shca *me); + +struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor); + +extern spinlock_t ehca_qp_idr_lock; +extern spinlock_t ehca_cq_idr_lock; +extern struct idr ehca_qp_idr; +extern struct idr ehca_cq_idr; + +struct ipzu_queue_resp { + u64 queue; /* points to first queue entry */ + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; + u32 dummy; /* padding for 8 byte alignment */ +}; + +struct ehca_create_cq_resp { + u32 cq_number; + u32 token; + struct ipzu_queue_resp ipz_queue; + struct h_galpas galpas; +}; + +struct ehca_create_qp_resp { + u32 qp_num; + u32 token; + u32 qp_type; + u32 qkey; + /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ + u32 real_qp_num; + u32 dummy; /* padding for 8 byte alignment */ + struct ipzu_queue_resp ipz_squeue; + struct ipzu_queue_resp ipz_rqueue; + struct h_galpas galpas; +}; + +struct ehca_alloc_cq_parms { + u32 nr_cqe; + u32 act_nr_of_entries; + u32 act_pages; + struct ipz_eq_handle eq_handle; +}; + +struct ehca_alloc_qp_parms { + int servicetype; + int sigtype; + int daqp_ctrl; + int max_send_sge; + int max_recv_sge; + int ud_av_l_key_ctl; + + u16 act_nr_send_wqes; + u16 act_nr_recv_wqes; + u8 act_nr_recv_sges; + u8 act_nr_send_sges; + + u32 nr_rq_pages; + u32 nr_sq_pages; + + struct ipz_eq_handle ipz_eq_handle; + struct ipz_pd pd; +}; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); +struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h new file mode 100644 index 0000000..5665f21 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -0,0 +1,236 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * pSeries interface definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_PSERIES_H__ +#define __EHCA_CLASSES_PSERIES_H__ + +#include "hcp_phyp.h" +#include "ipz_pt_fn.h" + + +struct ehca_pfqp { + struct ipz_qpt sqpt; + struct ipz_qpt rqpt; +}; + +struct ehca_pfcq { + struct ipz_qpt qpt; + u32 cqnr; +}; + +struct ehca_pfeq { + struct ipz_qpt qpt; + struct h_galpa galpa; + u32 eqnr; +}; + +struct ipz_adapter_handle { + u64 handle; +}; + +struct ipz_cq_handle { + u64 handle; +}; + +struct ipz_eq_handle { + u64 handle; +}; + +struct ipz_qp_handle { + u64 handle; +}; +struct ipz_mrmw_handle { + u64 handle; +}; + +struct ipz_pd { + u32 value; +}; + +struct hcp_modify_qp_control_block { + u32 qkey; /* 00 */ + u32 rdd; /* reliable datagram domain */ + u32 send_psn; /* 02 */ + u32 receive_psn; /* 03 */ + u32 prim_phys_port; /* 04 */ + u32 alt_phys_port; /* 05 */ + u32 prim_p_key_idx; /* 06 */ + u32 alt_p_key_idx; /* 07 */ + u32 rdma_atomic_ctrl; /* 08 */ + u32 qp_state; /* 09 */ + u32 reserved_10; /* 10 */ + u32 rdma_nr_atomic_resp_res; /* 11 */ + u32 path_migration_state; /* 12 */ + u32 rdma_atomic_outst_dest_qp; /* 13 */ + u32 dest_qp_nr; /* 14 */ + u32 min_rnr_nak_timer_field; /* 15 */ + u32 service_level; /* 16 */ + u32 send_grh_flag; /* 17 */ + u32 retry_count; /* 18 */ + u32 timeout; /* 19 */ + u32 path_mtu; /* 20 */ + u32 max_static_rate; /* 21 */ + u32 dlid; /* 22 */ + u32 rnr_retry_count; /* 23 */ + u32 source_path_bits; /* 24 */ + u32 traffic_class; /* 25 */ + u32 hop_limit; /* 26 */ + u32 source_gid_idx; /* 27 */ + u32 flow_label; /* 28 */ + u32 reserved_29; /* 29 */ + union { /* 30 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid; + u32 service_level_al; /* 34 */ + u32 send_grh_flag_al; /* 35 */ + u32 retry_count_al; /* 36 */ + u32 timeout_al; /* 37 */ + u32 max_static_rate_al; /* 38 */ + u32 dlid_al; /* 39 */ + u32 rnr_retry_count_al; /* 40 */ + u32 source_path_bits_al; /* 41 */ + u32 traffic_class_al; /* 42 */ + u32 hop_limit_al; /* 43 */ + u32 source_gid_idx_al; /* 44 */ + u32 flow_label_al; /* 45 */ + u32 reserved_46; /* 46 */ + u32 reserved_47; /* 47 */ + union { /* 48 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid_al; + u32 max_nr_outst_send_wr; /* 52 */ + u32 max_nr_outst_recv_wr; /* 53 */ + u32 disable_ete_credit_check; /* 54 */ + u32 qp_number; /* 55 */ + u64 send_queue_handle; /* 56 */ + u64 recv_queue_handle; /* 58 */ + u32 actual_nr_sges_in_sq_wqe; /* 60 */ + u32 actual_nr_sges_in_rq_wqe; /* 61 */ + u32 qp_enable; /* 62 */ + u32 curr_srq_limit; /* 63 */ + u64 qp_aff_asyn_ev_log_reg; /* 64 */ + u64 shared_rq_hndl; /* 66 */ + u64 trigg_doorbell_qp_hndl; /* 68 */ + u32 reserved_70_127[58]; /* 70 */ +}; + +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9) +#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20) +#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21) +#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22) +#define MQPCB_DLID EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23) +#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24) +#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25) +#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26) +#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27) +#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28) +#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31) +#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32) +#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33) +#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34) +#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35) +#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36) +#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37) +#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38) +#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39) +#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40) +#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41) +#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42) +#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45) +#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46) +#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47) +#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31) +#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) +#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) +#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49) +#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) + +#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c new file mode 100644 index 0000000..c52d1c3 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -0,0 +1,449 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Completion queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define DEB_PREFIX "e_cq" + +#include + +#include "ehca_iverbs.h" +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "hcp_if.h" + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) +{ + unsigned int qp_num = qp->real_qp_num; + unsigned int key = qp_num & (QP_HASHTAB_LEN-1); + unsigned long spl_flags = 0; + + spin_lock_irqsave(&cq->spinlock, spl_flags); + hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); + spin_unlock_irqrestore(&cq->spinlock, spl_flags); + + EDEB(7, "cq_num=%x real_qp_num=%x", cq->cq_number, qp_num); + + return 0; +} + +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) +{ + int ret = -EINVAL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter = NULL; + struct ehca_qp *qp = NULL; + unsigned long spl_flags = 0; + + spin_lock_irqsave(&cq->spinlock, spl_flags); + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + hlist_del(iter); + EDEB(7, "removed qp from cq .cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + ret = 0; + break; + } + } + spin_unlock_irqrestore(&cq->spinlock, spl_flags); + if (ret) { + EDEB_ERR(4, "qp not found cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + } + + return ret; +} + +struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +{ + struct ehca_qp *ret = NULL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter = NULL; + struct ehca_qp *qp = NULL; + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + ret = qp; + break; + } + } + return ret; +} + +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + extern struct ehca_module ehca_module; + struct ib_cq *cq = NULL; + struct ehca_cq *my_cq = NULL; + struct ehca_shca *shca = NULL; + struct ipz_adapter_handle adapter_handle; + /* h_call's out parameters */ + struct ehca_alloc_cq_parms param; + u32 counter = 0; + void *vpage = NULL; + u64 rpage = 0; + struct h_galpa gal; + u64 cqx_fec = 0; + u64 h_ret = 0; + int ipz_rc = 0; + int ret = 0; + const u32 additional_cqe=20; + int i= 0; + unsigned long flags; + + EHCA_CHECK_DEVICE_P(device); + EDEB_EN(7, "device=%p cqe=%x context=%p", device, cqe, context); + + if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) + return ERR_PTR(-EINVAL); + + my_cq = kmem_cache_alloc(ehca_module.cache_cq, SLAB_KERNEL); + if (!my_cq) { + cq = ERR_PTR(-ENOMEM); + EDEB_ERR(4, "Out of memory for ehca_cq struct device=%p", + device); + goto create_cq_exit0; + } + + memset(my_cq, 0, sizeof(struct ehca_cq)); + memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); + + spin_lock_init(&my_cq->spinlock); + spin_lock_init(&my_cq->cb_lock); + spin_lock_init(&my_cq->task_lock); + my_cq->ownpid = current->tgid; + + cq = &my_cq->ib_cq; + + shca = container_of(device, struct ehca_shca, ib_device); + adapter_handle = shca->ipz_hca_handle; + param.eq_handle = shca->eq.ipz_eq_handle; + + + do { + if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) { + cq = ERR_PTR(-ENOMEM); + EDEB_ERR(4, + "Can't reserve idr resources. " + "device=%p", device); + goto create_cq_exit1; + } + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + } while (ret == -EAGAIN); + + if (ret) { + cq = ERR_PTR(-ENOMEM); + EDEB_ERR(4, + "Can't allocate new idr entry. " + "device=%p", device); + goto create_cq_exit1; + } + + /* + * CQs maximum depth is 4GB-64, but we need additional 20 as buffer + * for receiving errors CQEs. + */ + param.nr_cqe = cqe + additional_cqe; + h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); + + if (h_ret != H_SUCCESS) { + EDEB_ERR(4,"hipz_h_alloc_resource_cq() failed " + "h_ret=%lx device=%p", h_ret, device); + cq = ERR_PTR(ehca2ib_return_code(h_ret)); + goto create_cq_exit2; + } + + ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages, + EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0); + if (!ipz_rc) { + EDEB_ERR(4, + "ipz_queue_ctor() failed " + "ipz_rc=%x device=%p", ipz_rc, device); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit3; + } + + for (counter = 0; counter < param.act_pages; counter++) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if (!vpage) { + EDEB_ERR(4, "ipz_qpageit_get_inc() " + "returns NULL device=%p", device); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_cq(adapter_handle, + my_cq->ipz_cq_handle, + &my_cq->pf, + 0, + 0, + rpage, + 1, + my_cq->galpas. + kernel); + + if (h_ret < H_SUCCESS) { + EDEB_ERR(4, "hipz_h_register_rpage_cq() failed " + "ehca_cq=%p cq_num=%x h_ret=%lx " + "counter=%i act_pages=%i", + my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit4; + } + + if (counter == (param.act_pages - 1)) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if ((h_ret != H_SUCCESS) || vpage) { + EDEB_ERR(4, "Registration of pages not " + "complete ehca_cq=%p cq_num=%x " + "h_ret=%lx", + my_cq, my_cq->cq_number, h_ret); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + EDEB_ERR(4, "Registration of page failed " + "ehca_cq=%p cq_num=%x h_ret=%lx" + "counter=%i act_pages=%i", + my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-ENOMEM); + goto create_cq_exit4; + } + } + } + + ipz_qeit_reset(&my_cq->ipz_queue); + + gal = my_cq->galpas.kernel; + cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); + EDEB(8, "ehca_cq=%p cq_num=%x CQX_FEC=%lx", + my_cq, my_cq->cq_number, cqx_fec); + + my_cq->ib_cq.cqe = my_cq->nr_of_entries = + param.act_nr_of_entries - additional_cqe; + my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; + + for (i = 0; i < QP_HASHTAB_LEN; i++) + INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + + if (context) { + struct ipz_queue *ipz_queue = &my_cq->ipz_queue; + struct ehca_create_cq_resp resp; + struct vm_area_struct *vma = NULL; + memset(&resp, 0, sizeof(resp)); + resp.cq_number = my_cq->cq_number; + resp.token = my_cq->token; + resp.ipz_queue.qe_size = ipz_queue->qe_size; + resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; + resp.ipz_queue.queue_length = ipz_queue->queue_length; + resp.ipz_queue.pagesize = ipz_queue->pagesize; + resp.ipz_queue.toggle_state = ipz_queue->toggle_state; + ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000, + ipz_queue->queue_length, + (void**)&resp.ipz_queue.queue, + &vma); + if (ret) { + EDEB_ERR(4, "Could not mmap queue pages"); + cq = ERR_PTR(ret); + goto create_cq_exit4; + } + my_cq->uspace_queue = resp.ipz_queue.queue; + resp.galpas = my_cq->galpas; + ret = ehca_mmap_register(my_cq->galpas.user.fw_handle, + (void**)&resp.galpas.kernel.fw_handle, + &vma); + if (ret) { + EDEB_ERR(4, "Could not mmap fw_handle"); + cq = ERR_PTR(ret); + goto create_cq_exit5; + } + my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { + EDEB_ERR(4, "Copy to udata failed."); + goto create_cq_exit6; + } + } + + EDEB_EX(7,"retcode=%p ehca_cq=%p cq_num=%x cq_size=%x", + cq, my_cq, my_cq->cq_number, param.act_nr_of_entries); + return cq; + +create_cq_exit6: + ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); + +create_cq_exit5: + ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length); + +create_cq_exit4: + ipz_queue_dtor(&my_cq->ipz_queue); + +create_cq_exit3: + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret != H_SUCCESS) + EDEB(4, "hipz_h_destroy_cq() failed ehca_cq=%p cq_num=%x " + "h_ret=%lx", my_cq, my_cq->cq_number, h_ret); + +create_cq_exit2: + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + +create_cq_exit1: + kmem_cache_free(ehca_module.cache_cq, my_cq); + +create_cq_exit0: + EDEB_EX(4, "An error has occured retcode=%p", cq); + return cq; +} + +int ehca_destroy_cq(struct ib_cq *cq) +{ + extern struct ehca_module ehca_module; + u64 h_ret = 0; + int ret = 0; + struct ehca_cq *my_cq = NULL; + int cq_num = 0; + struct ib_device *device = NULL; + struct ehca_shca *shca = NULL; + struct ipz_adapter_handle adapter_handle; + u32 cur_pid = current->tgid; + unsigned long flags; + + EHCA_CHECK_CQ(cq); + my_cq = container_of(cq, struct ehca_cq, ib_cq); + cq_num = my_cq->cq_number; + device = cq->device; + EHCA_CHECK_DEVICE(device); + shca = container_of(device, struct ehca_shca, ib_device); + adapter_handle = shca->ipz_hca_handle; + EDEB_EN(7, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + while (my_cq->nr_callbacks) + yield(); + + idr_remove(&ehca_cq_idr, my_cq->token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_cq->ownpid); + return -EINVAL; + } + + /* un-mmap if vma alloc */ + if (my_cq->uspace_queue ) { + ret = ehca_munmap(my_cq->uspace_queue, + my_cq->ipz_queue.queue_length); + ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); + } + + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); + if (h_ret == H_R_STATE) { + /* cq in err: read err data and destroy it forcibly */ + EDEB(4, "ehca_cq=%p cq_num=%x ressource=%lx in err state. " + "Try to delete it forcibly.", + my_cq, my_cq->cq_number, my_cq->ipz_cq_handle.handle); + ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret == H_SUCCESS) + EDEB(4, "ehca_cq=%p cq_num=%x deleted successfully.", + my_cq, my_cq->cq_number); + } + if (h_ret != H_SUCCESS) { + EDEB_ERR(4,"hipz_h_destroy_cq() failed " + "h_ret=%lx ehca_cq=%p cq_num=%x", + h_ret, my_cq, my_cq->cq_number); + ret = ehca2ib_return_code(h_ret); + goto destroy_cq_exit0; + } + ipz_queue_dtor(&my_cq->ipz_queue); + kmem_cache_free(ehca_module.cache_cq, my_cq); + +destroy_cq_exit0: + EDEB_EX(7, "ehca_cq=%p cq_num=%x ret=%x ", + my_cq, cq_num, ret); + return ret; +} + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ + int ret = 0; + struct ehca_cq *my_cq = NULL; + u32 cur_pid = current->tgid; + + if (unlikely(!cq)) { + EDEB_ERR(4, "cq is NULL"); + return -EFAULT; + } + + my_cq = container_of(cq, struct ehca_cq, ib_cq); + EDEB_EN(7, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + + if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_cq->ownpid); + return -EINVAL; + } + + /* TODO: proper resize needs to be done */ + ret = -EFAULT; + EDEB_ERR(4, "not implemented yet"); + + EDEB_EX(7, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c new file mode 100644 index 0000000..080ed02 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -0,0 +1,222 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Event queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define DEB_PREFIX "e_eq" + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_qes.h" +#include "hcp_if.h" +#include "ipz_pt_fn.h" + +int ehca_create_eq(struct ehca_shca *shca, + struct ehca_eq *eq, + const enum ehca_eq_type type, const u32 length) +{ + u64 ret = H_SUCCESS; + u32 nr_pages = 0; + u32 i; + void *vpage = NULL; + + EDEB_EN(7, "shca=%p eq=%p length=%x", shca, eq, length); + EHCA_CHECK_ADR(shca); + EHCA_CHECK_ADR(eq); + + spin_lock_init(&eq->spinlock); + eq->is_initialized = 0; + + if (type != EHCA_EQ && type != EHCA_NEQ) { + EDEB_ERR(4, "Invalid EQ type %x. eq=%p", type, eq); + return -EINVAL; + } + if (length == 0) { + EDEB_ERR(4, "EQ length must not be zero. eq=%p", eq); + return -EINVAL; + } + + ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, + &eq->pf, + type, + length, + &eq->ipz_eq_handle, + &eq->length, + &nr_pages, &eq->ist); + + if (ret != H_SUCCESS) { + EDEB_ERR(4, "Can't allocate EQ / NEQ. eq=%p", eq); + return -EINVAL; + } + + ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages, + EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0); + if (!ret) { + EDEB_ERR(4, "Can't allocate EQ pages. eq=%p", eq); + goto create_eq_exit1; + } + + for (i = 0; i < nr_pages; i++) { + u64 rpage; + + if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) { + ret = H_RESOURCE; + goto create_eq_exit2; + } + + rpage = virt_to_abs(vpage); + ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, + eq->ipz_eq_handle, + &eq->pf, + 0, 0, rpage, 1); + + if (i == (nr_pages - 1)) { + /* last page */ + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (ret != H_SUCCESS || vpage) + goto create_eq_exit2; + } else { + if (ret != H_PAGE_REGISTERED || !vpage) + goto create_eq_exit2; + } + } + + ipz_qeit_reset(&eq->ipz_queue); + + /* register interrupt handlers and initialize work queues */ + if (type == EHCA_EQ) { + ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq, + SA_INTERRUPT, "ehca_eq", + (void *)shca); + if (ret < 0) + EDEB_ERR(4, "Can't map interrupt handler."); + + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + } else if (type == EHCA_NEQ) { + ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq, + SA_INTERRUPT, "ehca_neq", + (void *)shca); + if (ret < 0) + EDEB_ERR(4, "Can't map interrupt handler."); + + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + } + + eq->is_initialized = 1; + + EDEB_EX(7, "ret=%lx", ret); + + return 0; + +create_eq_exit2: + ipz_queue_dtor(&eq->ipz_queue); + +create_eq_exit1: + hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + EDEB_EX(7, "ret=%lx", ret); + + return -EINVAL; +} + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags = 0; + void *eqe = NULL; + + EDEB_EN(7, "shca=%p eq=%p", shca, eq); + EHCA_CHECK_ADR_P(shca); + EHCA_CHECK_EQ_P(eq); + + spin_lock_irqsave(&eq->spinlock, flags); + eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); + spin_unlock_irqrestore(&eq->spinlock, flags); + + EDEB_EX(7, "eq=%p eqe=%p", eq, eqe); + + return eqe; +} + +void ehca_poll_eqs(unsigned long data) +{ + struct ehca_shca *shca; + struct ehca_module *module = (struct ehca_module*)data; + + spin_lock(&module->shca_lock); + list_for_each_entry(shca, &module->shca_list, shca_list) { + if (shca->eq.is_initialized) + ehca_tasklet_eq((unsigned long)(void*)shca); + } + mod_timer(&module->timer, jiffies + HZ); + spin_unlock(&module->shca_lock); + + return; +} + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags = 0; + u64 h_ret = H_SUCCESS; + + EDEB_EN(7, "shca=%p eq=%p", shca, eq); + EHCA_CHECK_ADR(shca); + EHCA_CHECK_EQ(eq); + + spin_lock_irqsave(&eq->spinlock, flags); + ibmebus_free_irq(NULL, eq->ist, (void *)shca); + + h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + spin_unlock_irqrestore(&eq->spinlock, flags); + + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "Can't free EQ resources."); + return -EINVAL; + } + ipz_queue_dtor(&eq->ipz_queue); + + EDEB_EX(7, "h_ret=%lx", h_ret); + + return h_ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c new file mode 100644 index 0000000..7a871b2 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -0,0 +1,282 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HCA query functions + * + * Authors: Heiko J Schick + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#undef DEB_PREFIX +#define DEB_PREFIX "shca" + +#include "ehca_tools.h" + +#include "hcp_if.h" + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +{ + int ret = 0; + struct ehca_shca *shca; + struct hipz_query_hca *rblock; + + EDEB_EN(7, ""); + + memset(props, 0, sizeof(struct ib_device_attr)); + shca = container_of(ibdev, struct ehca_shca, ib_device); + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + EDEB_ERR(4, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto query_device0; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + EDEB_ERR(4, "Can't query device properties"); + ret = -EINVAL; + goto query_device1; + } + props->fw_ver = rblock->hw_ver; + props->max_mr_size = rblock->max_mr_size; + props->vendor_id = rblock->vendor_id >> 8; + props->vendor_part_id = rblock->vendor_part_id >> 16; + props->hw_ver = rblock->hw_ver; + props->max_qp = min_t(int, rblock->max_qp, INT_MAX); + props->max_qp_wr = min_t(int, rblock->max_wqes_wq, INT_MAX); + props->max_sge = min_t(int, rblock->max_sge, INT_MAX); + props->max_sge_rd = min_t(int, rblock->max_sge_rd, INT_MAX); + props->max_cq = min_t(int, rblock->max_cq, INT_MAX); + props->max_cqe = min_t(int, rblock->max_cqe, INT_MAX); + props->max_mr = min_t(int, rblock->max_mr, INT_MAX); + props->max_mw = min_t(int, rblock->max_mw, INT_MAX); + props->max_pd = min_t(int, rblock->max_pd, INT_MAX); + props->max_ah = min_t(int, rblock->max_ah, INT_MAX); + props->max_fmr = min_t(int, rblock->max_mr, INT_MAX); + props->max_srq = 0; + props->max_srq_wr = 0; + props->max_srq_sge = 0; + props->max_pkeys = 16; + props->local_ca_ack_delay + = rblock->local_ca_ack_delay; + props->max_raw_ipv6_qp + = min_t(int, rblock->max_raw_ipv6_qp, INT_MAX); + props->max_raw_ethy_qp + = min_t(int, rblock->max_raw_ethy_qp, INT_MAX); + props->max_mcast_grp + = min_t(int, rblock->max_mcast_grp, INT_MAX); + props->max_mcast_qp_attach + = min_t(int, rblock->max_mcast_qp_attach, INT_MAX); + props->max_total_mcast_qp_attach + = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX); + +query_device1: + kfree(rblock); + +query_device0: + EDEB_EX(7, "ret=%x", ret); + + return ret; +} + +int ehca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + int ret = 0; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + + EDEB_EN(7, "port=%x", port); + + memset(props, 0, sizeof(struct ib_port_attr)); + shca = container_of(ibdev, struct ehca_shca, ib_device); + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + EDEB_ERR(4, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto query_port0; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + EDEB_ERR(4, "Can't query port properties"); + ret = -EINVAL; + goto query_port1; + } + + props->state = rblock->state; + + switch (rblock->max_mtu) { + case 0x1: + props->active_mtu = props->max_mtu = IB_MTU_256; + break; + case 0x2: + props->active_mtu = props->max_mtu = IB_MTU_512; + break; + case 0x3: + props->active_mtu = props->max_mtu = IB_MTU_1024; + break; + case 0x4: + props->active_mtu = props->max_mtu = IB_MTU_2048; + break; + case 0x5: + props->active_mtu = props->max_mtu = IB_MTU_4096; + break; + default: + EDEB_ERR(4, "Unknown MTU size: %x.", rblock->max_mtu); + } + + props->gid_tbl_len = rblock->gid_tbl_len; + props->max_msg_sz = rblock->max_msg_sz; + props->bad_pkey_cntr = rblock->bad_pkey_cntr; + props->qkey_viol_cntr = rblock->qkey_viol_cntr; + props->pkey_tbl_len = rblock->pkey_tbl_len; + props->lid = rblock->lid; + props->sm_lid = rblock->sm_lid; + props->lmc = rblock->lmc; + props->sm_sl = rblock->sm_sl; + props->subnet_timeout = rblock->subnet_timeout; + props->init_type_reply = rblock->init_type_reply; + + props->active_width = IB_WIDTH_12X; + props->active_speed = 0x1; + +query_port1: + kfree(rblock); + +query_port0: + EDEB_EX(7, "ret=%x", ret); + + return ret; +} + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + int ret = 0; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + + EDEB_EN(7, "port=%x index=%x", port, index); + + if (index > 16) { + EDEB_ERR(4, "Invalid index: %x.", index); + ret = -EINVAL; + goto query_pkey0; + } + + shca = container_of(ibdev, struct ehca_shca, ib_device); + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + EDEB_ERR(4, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto query_pkey0; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + EDEB_ERR(4, "Can't query port properties"); + ret = -EINVAL; + goto query_pkey1; + } + + memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); + +query_pkey1: + kfree(rblock); + +query_pkey0: + EDEB_EX(7, "ret=%x", ret); + + return ret; +} + +int ehca_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + int ret = 0; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + + EDEB_EN(7, "port=%x index=%x", port, index); + + if (index > 255) { + EDEB_ERR(4, "Invalid index: %x.", index); + ret = -EINVAL; + goto query_gid0; + } + + shca = container_of(ibdev, struct ehca_shca, ib_device); + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + EDEB_ERR(4, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto query_gid0; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + EDEB_ERR(4, "Can't query port properties"); + ret = -EINVAL; + goto query_gid1; + } + + memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); + memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); + +query_gid1: + kfree(rblock); + +query_gid0: + EDEB_EX(7, "ret=%x GID=%lx%lx", ret, + *(u64 *) & gid->raw[0], + *(u64 *) & gid->raw[8]); + + return ret; +} + +int ehca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + int ret = 0; + + EDEB_EN(7, "port=%x", port); + + /* Not implemented yet. */ + + EDEB_EX(7, "ret=%x", ret); + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c new file mode 100644 index 0000000..478a288 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -0,0 +1,736 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Functions for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define DEB_PREFIX "eirq" + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) + +static inline void comp_event_callback(struct ehca_cq *cq) +{ + EDEB_EN(7, "cq=%p", cq); + + if (!cq->ib_cq.comp_handler) + return; + + spin_lock(&cq->cb_lock); + cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); + spin_unlock(&cq->cb_lock); + + EDEB_EX(7, "cq=%p", cq); + + return; +} + +static void print_error_data(struct ehca_shca * shca, void* data, + u64* rblock, int length) +{ + u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); + u64 resource = rblock[1]; + + EDEB_EN(7, "shca=%p data=%p rblock=%p length=%x", + shca, data, rblock, length); + + switch (type) { + case 0x1: /* Queue Pair */ + { + struct ehca_qp *qp = (struct ehca_qp*)data; + + /* only print error data if AER is set */ + if (rblock[6] == 0) + return; + + EDEB_ERR(4, "QP 0x%x (resource=%lx) has errors.", + qp->ib_qp.qp_num, resource); + break; + } + case 0x4: /* Completion Queue */ + { + struct ehca_cq *cq = (struct ehca_cq*)data; + + EDEB_ERR(4, "CQ 0x%x (resource=%lx) has errors.", + cq->cq_number, resource); + break; + } + default: + EDEB_ERR(4, "Unknown errror type: %lx on %s.", + type, shca->ib_device.name); + break; + } + + EDEB_ERR(4, "Error data is available: %lx.", resource); + EDEB_ERR(4, "EHCA ----- error data begin " + "---------------------------------------------------"); + EDEB_DMP(4, rblock, length, "resource=%lx", resource); + EDEB_ERR(4, "EHCA ----- error data end " + "----------------------------------------------------"); + + EDEB_EX(7, ""); + + return; +} + +int ehca_error_data(struct ehca_shca *shca, void *data, + u64 resource) +{ + + unsigned long ret = 0; + u64 *rblock; + unsigned long block_count; + + EDEB_EN(7, "shca=%p data=%p resource=%lx", shca, data, resource); + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + EDEB_ERR(4, "Cannot allocate rblock memory."); + ret = -ENOMEM; + goto error_data1; + } + + ret = hipz_h_error_data(shca->ipz_hca_handle, + resource, + rblock, + &block_count); + + if (ret == H_R_STATE) { + EDEB_ERR(4, "No error data is available: %lx.", resource); + } + else if (ret == H_SUCCESS) { + int length; + + length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); + + if (length > PAGE_SIZE) + length = PAGE_SIZE; + + print_error_data(shca, data, rblock, length); + } + else { + EDEB_ERR(4, "Error data could not be fetched: %lx", resource); + } + + kfree(rblock); + +error_data1: + return ret; + +} + +static void qp_event_callback(struct ehca_shca *shca, + u64 eqe, + enum ib_event_type event_type) +{ + struct ib_event event; + struct ehca_qp *qp; + unsigned long flags; + u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); + + EDEB_EN(7, "eqe=%lx", eqe); + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + qp = idr_find(&ehca_qp_idr, token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + + if (!qp) + return; + + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + + if (!qp->ib_qp.event_handler) + return; + + event.device = &shca->ib_device; + event.event = event_type; + event.element.qp = &qp->ib_qp; + + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + + EDEB_EX(7, "qp=%p", qp); + + return; +} + +static void cq_event_callback(struct ehca_shca *shca, + u64 eqe) +{ + struct ehca_cq *cq; + unsigned long flags; + u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); + + EDEB_EN(7, "eqe=%lx", eqe); + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + if (!cq) + return; + + ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + + EDEB_EX(7, "cq=%p", cq); + + return; +} + +static void parse_identifier(struct ehca_shca *shca, u64 eqe) +{ + u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); + + EDEB_EN(7, "shca=%p eqe=%lx", shca, eqe); + + switch (identifier) { + case 0x02: /* path migrated */ + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG); + break; + case 0x03: /* communication established */ + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST); + break; + case 0x04: /* send queue drained */ + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED); + break; + case 0x05: /* QP error */ + case 0x06: /* QP error */ + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL); + break; + case 0x07: /* CQ error */ + case 0x08: /* CQ error */ + cq_event_callback(shca, eqe); + break; + case 0x09: /* MRMWPTE error */ + EDEB_ERR(4, "MRMWPTE error."); + break; + case 0x0A: /* port event */ + EDEB_ERR(4, "Port event."); + break; + case 0x0B: /* MR access error */ + EDEB_ERR(4, "MR access error."); + break; + case 0x0C: /* EQ error */ + EDEB_ERR(4, "EQ error."); + break; + case 0x0D: /* P/Q_Key mismatch */ + EDEB_ERR(4, "P/Q_Key mismatch."); + break; + case 0x10: /* sampling complete */ + EDEB_ERR(4, "Sampling complete."); + break; + case 0x11: /* unaffiliated access error */ + EDEB_ERR(4, "Unaffiliated access error."); + break; + case 0x12: /* path migrating error */ + EDEB_ERR(4, "Path migration error."); + break; + case 0x13: /* interface trace stopped */ + EDEB_ERR(4, "Interface trace stopped."); + break; + case 0x14: /* first error capture info available */ + default: + EDEB_ERR(4, "Unknown identifier: %x on %s.", + identifier, shca->ib_device.name); + break; + } + + EDEB_EX(7, "eqe=%lx identifier=%x", eqe, identifier); + + return; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ + struct ib_event event; + u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); + u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + + EDEB_EN(7, "shca=%p eqe=%lx", shca, eqe); + + switch (ec) { + case 0x30: /* port availability change */ + if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { + EDEB(4, "%s: port %x is active.", + shca->ib_device.name, port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ACTIVE; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + ib_dispatch_event(&event); + } else { + EDEB(4, "%s: port %x is inactive.", + shca->ib_device.name, port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + } + break; + case 0x31: + /* port configuration change + * disruptive change is caused by + * LID, PKEY or SM change + */ + EDEB(4, "EHCA disruptive port %x " + "configuration change.", port); + + EDEB(4, "%s: port %x is inactive.", + shca->ib_device.name, port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + + EDEB(4, "%s: port %x is active.", + shca->ib_device.name, port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ACTIVE; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + ib_dispatch_event(&event); + break; + case 0x32: /* adapter malfunction */ + EDEB_ERR(4, "Adapter malfunction."); + break; + case 0x33: /* trace stopped */ + EDEB_ERR(4, "Traced stopped."); + break; + default: + EDEB_ERR(4, "Unknown event code: %x on %s.", + ec, shca->ib_device.name); + break; + } + + EDEB_EN(7, "eqe=%lx ec=%x", eqe, ec); + + return; +} + +static inline void reset_eq_pending(struct ehca_cq *cq) +{ + u64 CQx_EP = 0; + struct h_galpa gal = cq->galpas.kernel; + + EDEB_EN(7, "cq=%p", cq); + + hipz_galpa_store_cq(gal, cqx_ep, 0x0); + CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); + EDEB(7, "CQx_EP=%lx", CQx_EP); + + EDEB_EX(7, "cq=%p", cq); + + return; +} + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + EDEB_EN(7, "dev_id=%p", dev_id); + + tasklet_hi_schedule(&shca->neq.interrupt_task); + + EDEB_EX(7, ""); + + return IRQ_HANDLED; +} + +void ehca_tasklet_neq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + u64 ret = H_SUCCESS; + + EDEB_EN(7, "shca=%p", shca); + + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + + while (eqe) { + if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) + parse_ec(shca, eqe->entry); + + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + } + + ret = hipz_h_reset_event(shca->ipz_hca_handle, + shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); + + if (ret != H_SUCCESS) + EDEB_ERR(4, "Can't clear notification events."); + + EDEB_EX(7, "shca=%p", shca); + + return; +} + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + EDEB_EN(7, "dev_id=%p", dev_id); + + tasklet_hi_schedule(&shca->eq.interrupt_task); + + EDEB_EX(7, ""); + + return IRQ_HANDLED; +} + +void ehca_tasklet_eq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + int int_state; + int query_cnt = 0; + + EDEB_EN(7, "shca=%p", shca); + + do { + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + + if ((shca->hw_level >= 2) && eqe) + int_state = 1; + else + int_state = 0; + + while ((int_state == 1) || eqe) { + while (eqe) { + u64 eqe_value = eqe->entry; + + EDEB(7, "eqe_value=%lx", eqe_value); + + /* TODO: better structure */ + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, + eqe_value)) { +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + extern struct ehca_comp_pool* ehca_pool; +#endif + extern struct idr ehca_cq_idr; + unsigned long flags; + u32 token; + struct ehca_cq *cq; + + EDEB(6, "... completion event"); + token = + EHCA_BMASK_GET(EQE_CQ_TOKEN, + eqe_value); + spin_lock_irqsave(&ehca_cq_idr_lock, + flags); + cq = idr_find(&ehca_cq_idr, token); + + if (cq == NULL) { + spin_unlock(&ehca_cq_idr_lock); + break; + } + + reset_eq_pending(cq); +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + ehca_queue_comp_task(ehca_pool, cq); + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); +#else + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); + comp_event_callback(cq); +#endif + } else { + EDEB(6, "... non completion event"); + parse_identifier(shca, eqe_value); + } + eqe = + (struct ehca_eqe *)ehca_poll_eq(shca, + &shca->eq); + } + + if (shca->hw_level >= 2) { + int_state = + hipz_h_query_int_state(shca->ipz_hca_handle, + shca->eq.ist); + query_cnt++; + iosync(); + if (query_cnt >= 100) { + query_cnt = 0; + int_state = 0; + } + } + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + + } + } while (int_state != 0); + + EDEB_EX(7, "shca=%p", shca); + + return; +} + +static inline int find_next_online_cpu(struct ehca_comp_pool* pool) +{ + unsigned long flags_last_cpu; + + spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu); + pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map); + + if (pool->last_cpu == NR_CPUS) + pool->last_cpu = 0; + + spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu); + + return pool->last_cpu; +} + +void ehca_queue_comp_task(struct ehca_comp_pool *pool, struct ehca_cq *__cq) +{ + int cpu; + int cpu_id; + struct ehca_cpu_comp_task *cct; + unsigned long flags_cct; + unsigned long flags_cq; + + cpu = get_cpu(); + cpu_id = find_next_online_cpu(pool); + + EDEB_EN(7, "pool=%p cq=%p cq_nr=%x CPU=%x:%x:%x:%x", + pool, __cq, __cq->cq_number, + cpu, cpu_id, num_online_cpus(), num_possible_cpus()); + + BUG_ON(!cpu_online(cpu_id)); + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + + if (cct->cq_jobs > 0) { + cpu_id = find_next_online_cpu(pool); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + } + + spin_lock_irqsave(&cct->task_lock, flags_cct); + spin_lock_irqsave(&__cq->task_lock, flags_cq); + + if (__cq->nr_callbacks == 0) { + __cq->nr_callbacks++; + list_add_tail(&__cq->entry, &cct->cq_list); + cct->cq_jobs++; + wake_up(&cct->wait_queue); + } + else + __cq->nr_callbacks++; + + spin_unlock_irqrestore(&__cq->task_lock, flags_cq); + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + + put_cpu(); + + EDEB_EX(7, "cct=%p", cct); + + return; +} + +static void run_comp_task(struct ehca_cpu_comp_task* cct) +{ + struct ehca_cq *cq = NULL; + unsigned long flags_cct; + unsigned long flags_cq; + + + EDEB_EN(7, "cct=%p", cct); + + spin_lock_irqsave(&cct->task_lock, flags_cct); + + while (!list_empty(&cct->cq_list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + comp_event_callback(cq); + spin_lock_irqsave(&cct->task_lock, flags_cct); + + spin_lock_irqsave(&cq->task_lock, flags_cq); + cq->nr_callbacks--; + if (cq->nr_callbacks == 0) { + list_del_init(cct->cq_list.next); + cct->cq_jobs--; + } + spin_unlock_irqrestore(&cq->task_lock, flags_cq); + + } + + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + + EDEB_EX(7, "cct=%p cq=%p", cct, cq); + + return; +} + +static int comp_task(void *__cct) +{ + struct ehca_cpu_comp_task* cct = __cct; + DECLARE_WAITQUEUE(wait, current); + + EDEB_EN(7, "cct=%p", cct); + + set_current_state(TASK_INTERRUPTIBLE); + while(!kthread_should_stop()) { + add_wait_queue(&cct->wait_queue, &wait); + + if (list_empty(&cct->cq_list)) + schedule(); + else + __set_current_state(TASK_RUNNING); + + remove_wait_queue(&cct->wait_queue, &wait); + + if (!list_empty(&cct->cq_list)) + run_comp_task(__cct); + + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + + EDEB_EX(7, ""); + + return 0; +} + +static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, + int cpu) +{ + struct ehca_cpu_comp_task *cct; + + EDEB_EN(7, "cpu=%d:%d", cpu, NR_CPUS); + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); + init_waitqueue_head(&cct->wait_queue); + cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu); + + EDEB_EX(7, "cct/%d=%p", cpu, cct); + + return cct->task; +} + +static void destroy_comp_task(struct ehca_comp_pool *pool, + int cpu) +{ + struct ehca_cpu_comp_task *cct; + struct task_struct *task; + + EDEB_EN(7, "pool=%p cpu=%d:%d", pool, cpu, NR_CPUS); + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + cct->task = NULL; + cct->cq_jobs = 0; + task = cct->task; + + if (task) + kthread_stop(task); + + EDEB_EX(7, ""); + + return; +} + +struct ehca_comp_pool *ehca_create_comp_pool(void) +{ + struct ehca_comp_pool *pool; + int cpu; + struct task_struct *task; + + EDEB_EN(7, ""); + + pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); + if (pool == NULL) + return NULL; + + spin_lock_init(&pool->last_cpu_lock); + pool->last_cpu = any_online_cpu(cpu_online_map); + + pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); + if (pool->cpu_comp_tasks == NULL) { + kfree(pool); + return NULL; + } + + for_each_online_cpu(cpu) { + task = create_comp_task(pool, cpu); + if (task) { + kthread_bind(task, cpu); + wake_up_process(task); + } + } + + EDEB_EX(7, "pool=%p", pool); + + return pool; +} + +void ehca_destroy_comp_pool(struct ehca_comp_pool *pool) +{ + int i; + + EDEB_EN(7, "pool=%p", pool); + + for (i = 0; i < NR_CPUS; i++) { + if (cpu_online(i)) + destroy_comp_task(pool, i); + } + + EDEB_EN(7, ""); + + return; +} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h new file mode 100644 index 0000000..7aa71eb --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -0,0 +1,78 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions and structs for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IRQ_H +#define __EHCA_IRQ_H + + +struct ehca_shca; + +#include +#include +#include + +int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs); +void ehca_tasklet_neq(unsigned long data); + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs); +void ehca_tasklet_eq(unsigned long data); + +struct ehca_cpu_comp_task { + wait_queue_head_t wait_queue; + struct list_head cq_list; + struct task_struct *task; + spinlock_t task_lock; + int cq_jobs; +}; + +struct ehca_comp_pool { + struct ehca_cpu_comp_task *cpu_comp_tasks; + int last_cpu; + spinlock_t last_cpu_lock; +}; + +struct ehca_comp_pool *ehca_create_comp_pool(void); +void ehca_destroy_comp_pool(struct ehca_comp_pool *pool); +void ehca_queue_comp_task(struct ehca_comp_pool *pool, struct ehca_cq *__cq); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h new file mode 100644 index 0000000..bbdc437 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -0,0 +1,181 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions for internal functions + * + * Authors: Heiko J Schick + * Dietmar Decker + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IVERBS_H__ +#define __EHCA_IVERBS_H__ + +#include "ehca_classes.h" + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props); + +int ehca_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); + +int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); + +int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, + struct ib_port_modify *props); + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_dealloc_pd(struct ib_pd *pd); + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_destroy_ah(struct ib_ah *ah); + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, u64 *iova_start); + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, + struct ib_umem *region, + int mr_access_flags, struct ib_udata *udata); + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, int mr_access_flags, u64 *iova_start); + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +int ehca_dereg_mr(struct ib_mr *mr); + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd); + +int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + +int ehca_dealloc_mw(struct ib_mw *mw); + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, u64 iova); + +int ehca_unmap_fmr(struct list_head *fmr_list); + +int ehca_dealloc_fmr(struct ib_fmr *fmr); + +enum ehca_eq_type { + EHCA_EQ = 0, /* Event Queue */ + EHCA_NEQ /* Notification Event Queue */ +}; + +int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, + enum ehca_eq_type type, const u32 length); + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); + + +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_destroy_cq(struct ib_cq *cq); + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); + +int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify); + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_destroy_qp(struct ib_qp *qp); + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); + +int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + +int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + +int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, + struct ib_qp_init_attr *qp_init_attr); + +int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata); + +int ehca_dealloc_ucontext(struct ib_ucontext *context); + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +void ehca_poll_eqs(unsigned long data); + +int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped, + struct vm_area_struct **vma); + +int ehca_mmap_register(u64 physical,void **mapped, + struct vm_area_struct **vma); + +int ehca_munmap(unsigned long addr, size_t len); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c new file mode 100644 index 0000000..78e8596 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -0,0 +1,964 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * module start stop, hca detection + * + * Authors: Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define DEB_PREFIX "shca" + +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Christoph Raisch "); +MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); +MODULE_VERSION("SVNEHCA_0008_GIT1"); + +struct ehca_comp_pool* ehca_pool; + +int ehca_open_aqp1 = 0; +int ehca_debug_level = -1; +int ehca_hw_level = 0; +int ehca_nr_ports = 2; +int ehca_use_hp_mr = 0; +int ehca_port_act_time = 30; +int ehca_poll_all_eqs = 1; +int ehca_static_rate = -1; + +module_param_named(open_aqp1, ehca_open_aqp1, int, 0); +module_param_named(debug_level, ehca_debug_level, int, 0); +module_param_named(hw_level, ehca_hw_level, int, 0); +module_param_named(nr_ports, ehca_nr_ports, int, 0); +module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0); +module_param_named(port_act_time, ehca_port_act_time, int, 0); +module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0); +module_param_named(static_rate, ehca_static_rate, int, 0); + +MODULE_PARM_DESC(open_aqp1, + "AQP1 on startup (0: no (default), 1: yes)"); +MODULE_PARM_DESC(debug_level, + "debug level" + " (0: node, 6: only errors (default), 9: all)"); +MODULE_PARM_DESC(hw_level, + "hardware level" + " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)"); +MODULE_PARM_DESC(nr_ports, + "number of connected ports (default: 2)"); +MODULE_PARM_DESC(use_hp_mr, + "high performance MRs (0: no (default), 1: yes)"); +MODULE_PARM_DESC(port_act_time, + "time to wait for port activation (default: 30 sec)"); +MODULE_PARM_DESC(poll_all_eqs, + "polls all event queues periodically" + " (0: no, 1: yes (default))"); +MODULE_PARM_DESC(static_rate, + "set permanent static rate (default: disabled)"); + +/* + * This external trace mask controls what will end up in the + * kernel ring buffer. Number 6 means, that everything between + * 0 and 5 will be stored. + */ +u8 ehca_edeb_mask[EHCA_EDEB_TRACE_MASK_SIZE]={6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 0, 0}; + +spinlock_t ehca_qp_idr_lock; +spinlock_t ehca_cq_idr_lock; +DEFINE_IDR(ehca_qp_idr); +DEFINE_IDR(ehca_cq_idr); + +struct ehca_module ehca_module; + +void ehca_init_trace(void) +{ + EDEB_EN(7, ""); + + if (ehca_debug_level != -1) { + int i; + for (i = 0; i < EHCA_EDEB_TRACE_MASK_SIZE; i++) + ehca_edeb_mask[i] = ehca_debug_level; + } + + EDEB_EX(7, ""); +} + +int ehca_create_slab_caches(struct ehca_module *ehca_module) +{ + int ret = 0; + + EDEB_EN(7, ""); + + ehca_module->cache_pd = + kmem_cache_create("ehca_cache_pd", + sizeof(struct ehca_pd), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!ehca_module->cache_pd) { + EDEB_ERR(4, "Cannot create PD SLAB cache."); + ret = -ENOMEM; + goto create_slab_caches1; + } + + ehca_module->cache_cq = + kmem_cache_create("ehca_cache_cq", + sizeof(struct ehca_cq), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!ehca_module->cache_cq) { + EDEB_ERR(4, "Cannot create CQ SLAB cache."); + ret = -ENOMEM; + goto create_slab_caches2; + } + + ehca_module->cache_qp = + kmem_cache_create("ehca_cache_qp", + sizeof(struct ehca_qp), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!ehca_module->cache_qp) { + EDEB_ERR(4, "Cannot create QP SLAB cache."); + ret = -ENOMEM; + goto create_slab_caches3; + } + + ehca_module->cache_av = + kmem_cache_create("ehca_cache_av", + sizeof(struct ehca_av), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!ehca_module->cache_av) { + EDEB_ERR(4, "Cannot create AV SLAB cache."); + ret = -ENOMEM; + goto create_slab_caches4; + } + + ehca_module->cache_mw = + kmem_cache_create("ehca_cache_mw", + sizeof(struct ehca_mw), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!ehca_module->cache_mw) { + EDEB_ERR(4, "Cannot create MW SLAB cache."); + ret = -ENOMEM; + goto create_slab_caches5; + } + + ehca_module->cache_mr = + kmem_cache_create("ehca_cache_mr", + sizeof(struct ehca_mr), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!ehca_module->cache_mr) { + EDEB_ERR(4, "Cannot create MR SLAB cache."); + ret = -ENOMEM; + goto create_slab_caches6; + } + + EDEB_EX(7, "ret=%x", ret); + + return ret; + +create_slab_caches6: + kmem_cache_destroy(ehca_module->cache_mw); + +create_slab_caches5: + kmem_cache_destroy(ehca_module->cache_av); + +create_slab_caches4: + kmem_cache_destroy(ehca_module->cache_qp); + +create_slab_caches3: + kmem_cache_destroy(ehca_module->cache_cq); + +create_slab_caches2: + kmem_cache_destroy(ehca_module->cache_pd); + +create_slab_caches1: + EDEB_EX(7, "ret=%x", ret); + + return ret; +} + +int ehca_destroy_slab_caches(struct ehca_module *ehca_module) +{ + int ret; + + EDEB_EN(7, ""); + + ret = kmem_cache_destroy(ehca_module->cache_pd); + if (ret) + EDEB_ERR(4, "Cannot destroy PD SLAB cache. ret=%x", ret); + + ret = kmem_cache_destroy(ehca_module->cache_cq); + if (ret) + EDEB_ERR(4, "Cannot destroy CQ SLAB cache. ret=%x", ret); + + ret = kmem_cache_destroy(ehca_module->cache_qp); + if (ret) + EDEB_ERR(4, "Cannot destroy QP SLAB cache. ret=%x", ret); + + ret = kmem_cache_destroy(ehca_module->cache_av); + if (ret) + EDEB_ERR(4, "Cannot destroy AV SLAB cache. ret=%x", ret); + + ret = kmem_cache_destroy(ehca_module->cache_mw); + if (ret) + EDEB_ERR(4, "Cannot destroy MW SLAB cache. ret=%x", ret); + + ret = kmem_cache_destroy(ehca_module->cache_mr); + if (ret) + EDEB_ERR(4, "Cannot destroy MR SLAB cache. ret=%x", ret); + + EDEB_EX(7, ""); + + return 0; +} + +#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) +#define EHCA_REVID EHCA_BMASK_IBM(40,63) + +int ehca_sense_attributes(struct ehca_shca *shca) +{ + int ret = -EINVAL; + u64 h_ret = H_SUCCESS; + struct hipz_query_hca *rblock; + + EDEB_EN(7, "shca=%p", shca); + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + EDEB_ERR(4, "Cannot allocate rblock memory."); + ret = -ENOMEM; + goto num_ports0; + } + + h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "Cannot query device properties. h_ret=%lx", h_ret); + ret = -EPERM; + goto num_ports1; + } + + if (ehca_nr_ports == 1) + shca->num_ports = 1; + else + shca->num_ports = (u8)rblock->num_ports; + + EDEB(6, " ... found %x ports", rblock->num_ports); + + if (ehca_hw_level == 0) { + u32 hcaaver; + u32 revid; + + hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); + revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); + + EDEB(6, " ... hardware version=%x:%x", + hcaaver, revid); + + if ((hcaaver == 1) && (revid == 0)) + shca->hw_level = 0; + else if ((hcaaver == 1) && (revid == 1)) + shca->hw_level = 1; + else if ((hcaaver == 1) && (revid == 2)) + shca->hw_level = 2; + } + EDEB(6, " ... hardware level=%x", shca->hw_level); + + shca->sport[0].rate = IB_RATE_30_GBPS; + shca->sport[1].rate = IB_RATE_30_GBPS; + + ret = 0; + +num_ports1: + kfree(rblock); + +num_ports0: + EDEB_EX(7, "ret=%x", ret); + + return ret; +} + +static int init_node_guid(struct ehca_shca* shca) +{ + int ret = 0; + struct hipz_query_hca *rblock; + + EDEB_EN(7, ""); + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + EDEB_ERR(4, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto init_node_guid0; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + EDEB_ERR(4, "Can't query device properties"); + ret = -EINVAL; + goto init_node_guid1; + } + + memcpy(&shca->ib_device.node_guid, &rblock->node_guid, (sizeof(u64))); + +init_node_guid1: + kfree(rblock); + +init_node_guid0: + EDEB_EX(7, "node_guid=%lx ret=%x", shca->ib_device.node_guid, ret); + + return ret; +} + +int ehca_register_device(struct ehca_shca *shca) +{ + int ret = 0; + + EDEB_EN(7, "shca=%p", shca); + + ret = init_node_guid(shca); + if (ret) + return ret; + + strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); + shca->ib_device.owner = THIS_MODULE; + + shca->ib_device.uverbs_abi_ver = 5; + shca->ib_device.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); + + shca->ib_device.node_type = IB_NODE_CA; + shca->ib_device.phys_port_cnt = shca->num_ports; + shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev; + shca->ib_device.query_device = ehca_query_device; + shca->ib_device.query_port = ehca_query_port; + shca->ib_device.query_gid = ehca_query_gid; + shca->ib_device.query_pkey = ehca_query_pkey; + /* shca->in_device.modify_device = ehca_modify_device */ + shca->ib_device.modify_port = ehca_modify_port; + shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; + shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; + shca->ib_device.alloc_pd = ehca_alloc_pd; + shca->ib_device.dealloc_pd = ehca_dealloc_pd; + shca->ib_device.create_ah = ehca_create_ah; + /* shca->ib_device.modify_ah = ehca_modify_ah; */ + shca->ib_device.query_ah = ehca_query_ah; + shca->ib_device.destroy_ah = ehca_destroy_ah; + shca->ib_device.create_qp = ehca_create_qp; + shca->ib_device.modify_qp = ehca_modify_qp; + shca->ib_device.query_qp = ehca_query_qp; + shca->ib_device.destroy_qp = ehca_destroy_qp; + shca->ib_device.post_send = ehca_post_send; + shca->ib_device.post_recv = ehca_post_recv; + shca->ib_device.create_cq = ehca_create_cq; + shca->ib_device.destroy_cq = ehca_destroy_cq; + shca->ib_device.resize_cq = ehca_resize_cq; + shca->ib_device.poll_cq = ehca_poll_cq; + /* shca->ib_device.peek_cq = ehca_peek_cq; */ + shca->ib_device.req_notify_cq = ehca_req_notify_cq; + /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ + shca->ib_device.get_dma_mr = ehca_get_dma_mr; + shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; + shca->ib_device.reg_user_mr = ehca_reg_user_mr; + shca->ib_device.query_mr = ehca_query_mr; + shca->ib_device.dereg_mr = ehca_dereg_mr; + shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; + shca->ib_device.alloc_mw = ehca_alloc_mw; + shca->ib_device.bind_mw = ehca_bind_mw; + shca->ib_device.dealloc_mw = ehca_dealloc_mw; + shca->ib_device.alloc_fmr = ehca_alloc_fmr; + shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; + shca->ib_device.unmap_fmr = ehca_unmap_fmr; + shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; + shca->ib_device.attach_mcast = ehca_attach_mcast; + shca->ib_device.detach_mcast = ehca_detach_mcast; + /* shca->ib_device.process_mad = ehca_process_mad; */ + shca->ib_device.mmap = ehca_mmap; + + ret = ib_register_device(&shca->ib_device); + + EDEB_EX(7, "ret=%x", ret); + + return ret; +} + +static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) +{ + struct ehca_sport *sport; + struct ib_cq *ibcq; + struct ib_qp *ibqp; + struct ib_qp_init_attr qp_init_attr; + int ret = 0; + + EDEB_EN(7, "shca=%p port=%x", shca, port); + + sport = &shca->sport[port - 1]; + + if (sport->ibcq_aqp1) { + EDEB_ERR(4, "AQP1 CQ is already created."); + return -EPERM; + } + + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10); + if (IS_ERR(ibcq)) { + EDEB_ERR(4, "Cannot create AQP1 CQ."); + return PTR_ERR(ibcq); + } + sport->ibcq_aqp1 = ibcq; + + if (sport->ibqp_aqp1) { + EDEB_ERR(4, "AQP1 QP is already created."); + ret = -EPERM; + goto create_aqp1; + } + + memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); + qp_init_attr.send_cq = ibcq; + qp_init_attr.recv_cq = ibcq; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.cap.max_send_wr = 100; + qp_init_attr.cap.max_recv_wr = 100; + qp_init_attr.cap.max_send_sge = 2; + qp_init_attr.cap.max_recv_sge = 1; + qp_init_attr.qp_type = IB_QPT_GSI; + qp_init_attr.port_num = port; + qp_init_attr.qp_context = NULL; + qp_init_attr.event_handler = NULL; + qp_init_attr.srq = NULL; + + ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); + if (IS_ERR(ibqp)) { + EDEB_ERR(4, "Cannot create AQP1 QP."); + ret = PTR_ERR(ibqp); + goto create_aqp1; + } + sport->ibqp_aqp1 = ibqp; + + goto create_aqp0; + +create_aqp1: + ib_destroy_cq(sport->ibcq_aqp1); + +create_aqp0: + EDEB_EX(7, "ret=%x", ret); + + return ret; +} + +static int ehca_destroy_aqp1(struct ehca_sport *sport) +{ + int ret = 0; + + EDEB_EN(7, "sport=%p", sport); + + ret = ib_destroy_qp(sport->ibqp_aqp1); + if (ret) { + EDEB_ERR(4, "Cannot destroy AQP1 QP. ret=%x", ret); + goto destroy_aqp1; + } + + ret = ib_destroy_cq(sport->ibcq_aqp1); + if (ret) + EDEB_ERR(4, "Cannot destroy AQP1 CQ. ret=%x", ret); + +destroy_aqp1: + EDEB_EX(7, "ret=%x", ret); + + return ret; +} + +static ssize_t ehca_show_debug_mask(struct device_driver *ddp, char *buf) +{ + int i; + int total = 0; + total += snprintf(buf + total, PAGE_SIZE - total, "%d", + ehca_edeb_mask[0]); + for (i = 1; i < EHCA_EDEB_TRACE_MASK_SIZE; i++) { + total += snprintf(buf + total, PAGE_SIZE - total, "%d", + ehca_edeb_mask[i]); + } + + total += snprintf(buf + total, PAGE_SIZE - total, "\n"); + + return total; +} + +static ssize_t ehca_store_debug_mask(struct device_driver *ddp, + const char *buf, size_t count) +{ + int i; + for (i = 0; i < EHCA_EDEB_TRACE_MASK_SIZE; i++) { + char value = buf[i] - '0'; + if ((value <= 9) && (count >= i)) { + ehca_edeb_mask[i] = value; + } + } + return count; +} +DRIVER_ATTR(debug_mask, S_IRUSR | S_IWUSR, + ehca_show_debug_mask, ehca_store_debug_mask); + +void ehca_create_driver_sysfs(struct ibmebus_driver *drv) +{ + driver_create_file(&drv->driver, &driver_attr_debug_mask); +} + +void ehca_remove_driver_sysfs(struct ibmebus_driver *drv) +{ + driver_remove_file(&drv->driver, &driver_attr_debug_mask); +} + +#define EHCA_RESOURCE_ATTR(name) \ +static ssize_t ehca_show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct ehca_shca *shca; \ + struct hipz_query_hca *rblock; \ + int data; \ + \ + shca = dev->driver_data; \ + \ + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); \ + if (!rblock) { \ + EDEB_ERR(4, "Can't allocate rblock memory."); \ + return 0; \ + } \ + \ + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ + EDEB_ERR(4, "Can't query device properties"); \ + kfree(rblock); \ + return 0; \ + } \ + \ + data = rblock->name; \ + kfree(rblock); \ + \ + if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ + return snprintf(buf, 256, "1\n"); \ + else \ + return snprintf(buf, 256, "%d\n", data); \ + \ +} \ +static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); + +EHCA_RESOURCE_ATTR(num_ports); +EHCA_RESOURCE_ATTR(hw_ver); +EHCA_RESOURCE_ATTR(max_eq); +EHCA_RESOURCE_ATTR(cur_eq); +EHCA_RESOURCE_ATTR(max_cq); +EHCA_RESOURCE_ATTR(cur_cq); +EHCA_RESOURCE_ATTR(max_qp); +EHCA_RESOURCE_ATTR(cur_qp); +EHCA_RESOURCE_ATTR(max_mr); +EHCA_RESOURCE_ATTR(cur_mr); +EHCA_RESOURCE_ATTR(max_mw); +EHCA_RESOURCE_ATTR(cur_mw); +EHCA_RESOURCE_ATTR(max_pd); +EHCA_RESOURCE_ATTR(max_ah); + +static ssize_t ehca_show_adapter_handle(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ehca_shca *shca = dev->driver_data; + + return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle); + +} +static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); + + + +void ehca_create_device_sysfs(struct ibmebus_dev *dev) +{ + device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle); + device_create_file(&dev->ofdev.dev, &dev_attr_num_ports); + device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver); + device_create_file(&dev->ofdev.dev, &dev_attr_max_eq); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq); + device_create_file(&dev->ofdev.dev, &dev_attr_max_cq); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq); + device_create_file(&dev->ofdev.dev, &dev_attr_max_qp); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp); + device_create_file(&dev->ofdev.dev, &dev_attr_max_mr); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr); + device_create_file(&dev->ofdev.dev, &dev_attr_max_mw); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw); + device_create_file(&dev->ofdev.dev, &dev_attr_max_pd); + device_create_file(&dev->ofdev.dev, &dev_attr_max_ah); +} + +void ehca_remove_device_sysfs(struct ibmebus_dev *dev) +{ + device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle); + device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports); + device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah); +} + +static int __devinit ehca_probe(struct ibmebus_dev *dev, + const struct of_device_id *id) +{ + struct ehca_shca *shca; + u64 *handle; + struct ib_pd *ibpd; + int ret = 0; + + EDEB_EN(7, "name=%s", dev->name); + + handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL); + if (!handle) { + EDEB_ERR(4, "Cannot get eHCA handle for adapter: %s.", + dev->ofdev.node->full_name); + return -ENODEV; + } + + if (!(*handle)) { + EDEB_ERR(4, "Wrong eHCA handle for adapter: %s.", + dev->ofdev.node->full_name); + return -ENODEV; + } + + shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); + if (shca == NULL) { + EDEB_ERR(4, "Cannot allocate shca memory."); + return -ENOMEM; + } + + shca->ibmebus_dev = dev; + shca->ipz_hca_handle.handle = *handle; + dev->ofdev.dev.driver_data = shca; + + ret = ehca_sense_attributes(shca); + if (ret < 0) { + EDEB_ERR(4, "Cannot sense eHCA attributes."); + goto probe1; + } + + /* create event queues */ + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); + if (ret) { + EDEB_ERR(4, "Cannot create EQ."); + goto probe1; + } + + ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); + if (ret) { + EDEB_ERR(4, "Cannot create NEQ."); + goto probe2; + } + + /* create internal protection domain */ + ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL); + if (IS_ERR(ibpd)) { + EDEB_ERR(4, "Cannot create internal PD."); + ret = PTR_ERR(ibpd); + goto probe3; + } + + shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); + shca->pd->ib_pd.device = &shca->ib_device; + + /* create internal max MR */ + ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); + if (ret) { + EDEB_ERR(4, "Cannot create internal MR. ret=%x", ret); + goto probe4; + } + + ret = ehca_register_device(shca); + if (ret) { + EDEB_ERR(4, "Cannot register Infiniband device."); + goto probe5; + } + + /* create AQP1 for port 1 */ + if (ehca_open_aqp1 == 1) { + shca->sport[0].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 1); + if (ret) { + EDEB_ERR(4, "Cannot create AQP1 for port 1."); + goto probe6; + } + } + + /* create AQP1 for port 2 */ + if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { + shca->sport[1].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 2); + if (ret) { + EDEB_ERR(4, "Cannot create AQP1 for port 2."); + goto probe7; + } + } + + ehca_create_device_sysfs(dev); + + spin_lock(&ehca_module.shca_lock); + list_add(&shca->shca_list, &ehca_module.shca_list); + spin_unlock(&ehca_module.shca_lock); + + EDEB_EX(7, "ret=%x", ret); + + return 0; + +probe7: + ret = ehca_destroy_aqp1(&shca->sport[0]); + if (ret) + EDEB_ERR(4, "Cannot destroy AQP1 for port 1. ret=%x", ret); + +probe6: + ib_unregister_device(&shca->ib_device); + +probe5: + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + EDEB_ERR(4, "Cannot destroy internal MR. ret=%x", ret); + +probe4: + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret != 0) + EDEB_ERR(4, "Cannot destroy internal PD. ret=%x", ret); + +probe3: + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret != 0) + EDEB_ERR(4, "Cannot destroy NEQ. ret=%x", ret); + +probe2: + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret != 0) + EDEB_ERR(4, "Cannot destroy EQ. ret=%x", ret); + +probe1: + ib_dealloc_device(&shca->ib_device); + + EDEB_EX(4, "ret=%x", ret); + + return -EINVAL; +} + +static int __devexit ehca_remove(struct ibmebus_dev *dev) +{ + struct ehca_shca *shca = dev->ofdev.dev.driver_data; + int ret; + + EDEB_EN(7, "shca=%p", shca); + + ehca_remove_device_sysfs(dev); + + if (ehca_open_aqp1 == 1) { + int i; + + for (i = 0; i < shca->num_ports; i++) { + ret = ehca_destroy_aqp1(&shca->sport[i]); + if (ret != 0) + EDEB_ERR(4, "Cannot destroy AQP1 for port %x." + " ret=%x", ret, i); + } + } + + ib_unregister_device(&shca->ib_device); + + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + EDEB_ERR(4, "Cannot destroy internal MR. ret=%x", ret); + + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + EDEB_ERR(4, "Cannot destroy internal PD. ret=%x", ret); + + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + EDEB_ERR(4, "Cannot destroy EQ. ret=%x", ret); + + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + EDEB_ERR(4, "Canot destroy NEQ. ret=%x", ret); + + ib_dealloc_device(&shca->ib_device); + + spin_lock(&ehca_module.shca_lock); + list_del(&shca->shca_list); + spin_unlock(&ehca_module.shca_lock); + + EDEB_EX(7, "ret=%x", ret); + + return ret; +} + +static struct of_device_id ehca_device_table[] = +{ + { + .name = "lhca", + .compatible = "IBM,lhca", + }, + {}, +}; + +static struct ibmebus_driver ehca_driver = { + .name = "ehca", + .id_table = ehca_device_table, + .probe = ehca_probe, + .remove = ehca_remove, +}; + +int __init ehca_module_init(void) +{ + int ret = 0; + + printk(KERN_INFO "eHCA Infiniband Device Driver " + "(Rel.: SVNEHCA_0008_GIT1)\n"); + EDEB_EN(7, ""); + + idr_init(&ehca_qp_idr); + idr_init(&ehca_cq_idr); + spin_lock_init(&ehca_qp_idr_lock); + spin_lock_init(&ehca_cq_idr_lock); + + INIT_LIST_HEAD(&ehca_module.shca_list); + spin_lock_init(&ehca_module.shca_lock); + + ehca_init_trace(); + + ehca_pool = ehca_create_comp_pool(); + if (ehca_pool == NULL) { + EDEB_ERR(4, "Cannot create comp pool."); + ret = -EINVAL; + goto module_init0; + } + + if ((ret = ehca_create_slab_caches(&ehca_module))) { + EDEB_ERR(4, "Cannot create SLAB caches"); + ret = -ENOMEM; + goto module_init1; + } + + if ((ret = ibmebus_register_driver(&ehca_driver))) { + EDEB_ERR(4, "Cannot register eHCA device driver"); + ret = -EINVAL; + goto module_init2; + } + + ehca_create_driver_sysfs(&ehca_driver); + + if (ehca_poll_all_eqs != 1) { + EDEB_ERR(4, "WARNING!!!"); + EDEB_ERR(4, "It is possible to lose interrupts."); + + return 0; + } + + init_timer(&ehca_module.timer); + ehca_module.timer.function = ehca_poll_eqs; + ehca_module.timer.data = (unsigned long)&ehca_module; + ehca_module.timer.expires = jiffies + HZ; + add_timer(&ehca_module.timer); + + goto module_init0; + +module_init2: + ehca_destroy_slab_caches(&ehca_module); + +module_init1: + ehca_destroy_comp_pool(ehca_pool); + +module_init0: + EDEB_EX(7, "ret=%x", ret); + + return ret; +}; + +void __exit ehca_module_exit(void) +{ + EDEB_EN(7, ""); + + if (ehca_poll_all_eqs == 1) + del_timer_sync(&ehca_module.timer); + + ehca_remove_driver_sysfs(&ehca_driver); + ibmebus_unregister_driver(&ehca_driver); + + if (ehca_destroy_slab_caches(&ehca_module) != 0) + EDEB_ERR(4, "Cannot destroy SLAB caches"); + + ehca_destroy_comp_pool(ehca_pool); + + idr_destroy(&ehca_cq_idr); + idr_destroy(&ehca_qp_idr); + + EDEB_EX(7, ""); +}; + +module_init(ehca_module_init); +module_exit(ehca_module_exit); diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c new file mode 100644 index 0000000..5c5b024 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mcast.c @@ -0,0 +1,200 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * mcast functions + * + * Authors: Khadija Souissi + * Waleri Fomin + * Reinhard Ernst + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define DEB_PREFIX "mcas" + +#include +#include +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" + +#include "hcp_if.h" + +#define MAX_MC_LID 0xFFFE +#define MIN_MC_LID 0xC000 /* Multicast limits */ +#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) +#define EHCA_VALID_MULTICAST_LID(lid) (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) + +int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = NULL; + struct ehca_shca *shca = NULL; + union ib_gid my_gid; + u64 subnet_prefix; + u64 interface_id; + u64 h_ret = H_SUCCESS; + int ret = 0; + + EHCA_CHECK_ADR(ibqp); + EHCA_CHECK_ADR(gid); + + my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + + EHCA_CHECK_QP(my_qp); + if (ibqp->qp_type != IB_QPT_UD) { + EDEB_ERR(4, "invalid qp_type %x gid, ret=%x", + ibqp->qp_type, EINVAL); + return -EINVAL; + } + + shca = container_of(ibqp->pd->device, struct ehca_shca, ib_device); + EHCA_CHECK_ADR(shca); + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + EDEB_ERR(4, "gid is not valid mulitcast gid ret=%x", + EINVAL); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + EDEB_ERR(4, "lid=%x is not valid mulitcast lid ret=%x", + lid, EINVAL); + return -EINVAL; + } + + memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, + "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " + "h_ret=%lx", my_qp, ibqp->qp_num, h_ret); + } + ret = ehca2ib_return_code(h_ret); + + EDEB_EX(7, "mcast attach ret=%x\n" + "ehca_qp=%p qp_num=%x lid=%x\n" + "my_gid= %x %x %x %x\n" + " %x %x %x %x\n" + " %x %x %x %x\n" + " %x %x %x %x\n", + ret, my_qp, ibqp->qp_num, lid, + my_gid.raw[0], my_gid.raw[1], + my_gid.raw[2], my_gid.raw[3], + my_gid.raw[4], my_gid.raw[5], + my_gid.raw[6], my_gid.raw[7], + my_gid.raw[8], my_gid.raw[9], + my_gid.raw[10], my_gid.raw[11], + my_gid.raw[12], my_gid.raw[13], + my_gid.raw[14], my_gid.raw[15]); + + return ret; +} + +int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = NULL; + struct ehca_shca *shca = NULL; + union ib_gid my_gid; + u64 subnet_prefix; + u64 interface_id; + u64 h_ret = H_SUCCESS; + int ret = 0; + + EHCA_CHECK_ADR(ibqp); + EHCA_CHECK_ADR(gid); + + my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + + EHCA_CHECK_QP(my_qp); + if (ibqp->qp_type != IB_QPT_UD) { + EDEB_ERR(4, "invalid qp_type %x gid, ret=%x", + ibqp->qp_type, EINVAL); + return -EINVAL; + } + + shca = container_of(ibqp->pd->device, struct ehca_shca, ib_device); + EHCA_CHECK_ADR(shca); + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + EDEB_ERR(4, "gid is not valid mulitcast gid ret=%x", + EINVAL); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + EDEB_ERR(4, "lid=%x is not valid mulitcast lid ret=%x", + lid, EINVAL); + return -EINVAL; + } + + EDEB_EN(7, "dgid=%p qp_numl=%x lid=%x", + gid, ibqp->qp_num, lid); + + memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, + "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " + "h_ret=%lx", my_qp, ibqp->qp_num, h_ret); + } + ret = ehca2ib_return_code(h_ret); + + EDEB_EX(7, "mcast detach ret=%x\n" + "ehca_qp=%p qp_num=%x lid=%x\n" + "my_gid= %x %x %x %x\n" + " %x %x %x %x\n" + " %x %x %x %x\n" + " %x %x %x %x\n", + ret, my_qp, ibqp->qp_num, lid, + my_gid.raw[0], my_gid.raw[1], + my_gid.raw[2], my_gid.raw[3], + my_gid.raw[4], my_gid.raw[5], + my_gid.raw[6], my_gid.raw[7], + my_gid.raw[8], my_gid.raw[9], + my_gid.raw[10], my_gid.raw[11], + my_gid.raw[12], my_gid.raw[13], + my_gid.raw[14], my_gid.raw[15]); + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c new file mode 100644 index 0000000..b7452ce --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -0,0 +1,2471 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#undef DEB_PREFIX +#define DEB_PREFIX "mrmw" + +#include + +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "hcp_if.h" +#include "hipz_hw.h" + +extern int ehca_use_hp_mr; + +static struct ehca_mr *ehca_mr_new(void) +{ + extern struct ehca_module ehca_module; + struct ehca_mr *me; + + me = kmem_cache_alloc(ehca_module.cache_mr, SLAB_KERNEL); + if (me) { + memset(me, 0, sizeof(struct ehca_mr)); + spin_lock_init(&me->mrlock); + EDEB_EX(7, "ehca_mr=%p sizeof(ehca_mr_t)=%x", me, + (u32) sizeof(struct ehca_mr)); + } else { + EDEB_ERR(3, "alloc failed"); + } + + return me; +} + +static void ehca_mr_delete(struct ehca_mr *me) +{ + extern struct ehca_module ehca_module; + + kmem_cache_free(ehca_module.cache_mr, me); +} + +static struct ehca_mw *ehca_mw_new(void) +{ + extern struct ehca_module ehca_module; + struct ehca_mw *me; + + me = kmem_cache_alloc(ehca_module.cache_mw, SLAB_KERNEL); + if (me) { + memset(me, 0, sizeof(struct ehca_mw)); + spin_lock_init(&me->mwlock); + EDEB_EX(7, "ehca_mw=%p sizeof(ehca_mw_t)=%x", me, + (u32) sizeof(struct ehca_mw)); + } else { + EDEB_ERR(3, "alloc failed"); + } + + return me; +} + +static void ehca_mw_delete(struct ehca_mw *me) +{ + extern struct ehca_module ehca_module; + + kmem_cache_free(ehca_module.cache_mw, me); +} + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) +{ + struct ib_mr *ib_mr = NULL; + int ret = 0; + struct ehca_mr *e_maxmr = NULL; + struct ehca_pd *e_pd = NULL; + struct ehca_shca *shca = NULL; + + EDEB_EN(7, "pd=%p mr_access_flags=%x", pd, mr_access_flags); + + EHCA_CHECK_PD_P(pd); + e_pd = container_of(pd, struct ehca_pd, ib_pd); + shca = container_of(pd->device, struct ehca_shca, ib_device); + + if (shca->maxmr) { + e_maxmr = ehca_mr_new(); + if (!e_maxmr) { + EDEB_ERR(4, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto get_dma_mr_exit0; + } + + ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE, + mr_access_flags, e_pd, + &e_maxmr->ib.ib_mr.lkey, + &e_maxmr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto get_dma_mr_exit0; + } + ib_mr = &e_maxmr->ib.ib_mr; + } else { + EDEB_ERR(4, "no internal max-MR exist!"); + ib_mr = ERR_PTR(-EINVAL); + goto get_dma_mr_exit0; + } + +get_dma_mr_exit0: + if (IS_ERR(ib_mr)) + EDEB_EX(4, "rc=%lx pd=%p mr_access_flags=%x ", + PTR_ERR(ib_mr), pd, mr_access_flags); + else + EDEB_EX(7, "ib_mr=%p lkey=%x rkey=%x", + ib_mr, ib_mr->lkey, ib_mr->rkey); + return ib_mr; +} /* end ehca_get_dma_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + struct ib_mr *ib_mr = NULL; + int ret = 0; + struct ehca_mr *e_mr = NULL; + struct ehca_shca *shca = NULL; + struct ehca_pd *e_pd = NULL; + u64 size = 0; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 num_pages_mr = 0; + u32 num_pages_4k = 0; /* 4k portion "pages" */ + + EDEB_EN(7, "pd=%p phys_buf_array=%p num_phys_buf=%x " + "mr_access_flags=%x iova_start=%p", pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + + EHCA_CHECK_PD_P(pd); + if ((num_phys_buf <= 0) || ehca_adr_bad(phys_buf_array)) { + EDEB_ERR(4, "bad input values: num_phys_buf=%x " + "phys_buf_array=%p", num_phys_buf, phys_buf_array); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + EDEB_ERR(4, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, + iova_start, &size); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit0; + } + if ((size == 0) || + (((u64)iova_start + size) < (u64)iova_start)) { + EDEB_ERR(4, "bad input values: size=%lx iova_start=%p", + size, iova_start); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + e_pd = container_of(pd, struct ehca_pd, ib_pd); + shca = container_of(pd->device, struct ehca_shca, ib_device); + + e_mr = ehca_mr_new(); + if (!e_mr) { + EDEB_ERR(4, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_phys_mr_exit0; + } + + /* determine number of MR pages */ + num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + + /* register MR on HCA */ + if (ehca_mr_is_maxmr(size, iova_start)) { + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, + e_pd, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } else { + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.num_phys_buf = num_phys_buf; + pginfo.phys_buf_array = phys_buf_array; + pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } + + /* successful registration of all pages */ + ib_mr = &e_mr->ib.ib_mr; + goto reg_phys_mr_exit0; + +reg_phys_mr_exit1: + ehca_mr_delete(e_mr); +reg_phys_mr_exit0: + if (IS_ERR(ib_mr)) + EDEB_EX(4, "rc=%lx pd=%p phys_buf_array=%p " + "num_phys_buf=%x mr_access_flags=%x iova_start=%p", + PTR_ERR(ib_mr), pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + else + EDEB_EX(7, "ib_mr=%p lkey=%x rkey=%x", + ib_mr, ib_mr->lkey, ib_mr->rkey); + return ib_mr; +} /* end ehca_reg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, + struct ib_umem *region, + int mr_access_flags, + struct ib_udata *udata) +{ + struct ib_mr *ib_mr = NULL; + struct ehca_mr *e_mr = NULL; + struct ehca_shca *shca = NULL; + struct ehca_pd *e_pd = NULL; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + int ret = 0; + u32 num_pages_mr = 0; + u32 num_pages_4k = 0; /* 4k portion "pages" */ + + EDEB_EN(7, "pd=%p region=%p mr_access_flags=%x udata=%p", + pd, region, mr_access_flags, udata); + + EHCA_CHECK_PD_P(pd); + if (ehca_adr_bad(region)) { + EDEB_ERR(4, "bad input values: region=%p", region); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + EDEB_ERR(4, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + EDEB(7, "user_base=%lx virt_base=%lx length=%lx offset=%x page_size=%x " + "chunk_list.next=%p", + region->user_base, region->virt_base, region->length, + region->offset, region->page_size, region->chunk_list.next); + if (region->page_size != PAGE_SIZE) { + EDEB_ERR(4, "page size not supported, region->page_size=%x", + region->page_size); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + if ((region->length == 0) || + ((region->virt_base + region->length) < region->virt_base)) { + EDEB_ERR(4, "bad input values: length=%lx virt_base=%lx", + region->length, region->virt_base); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + e_pd = container_of(pd, struct ehca_pd, ib_pd); + shca = container_of(pd->device, struct ehca_shca, ib_device); + + e_mr = ehca_mr_new(); + if (!e_mr) { + EDEB_ERR(4, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_user_mr_exit0; + } + + /* determine number of MR pages */ + num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + + /* register MR on HCA */ + pginfo.type = EHCA_MR_PGI_USER; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.region = region; + pginfo.next_4k = region->offset / EHCA_PAGESIZE; + pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, + (®ion->chunk_list), + list); + + ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base, + region->length, mr_access_flags, e_pd, &pginfo, + &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_user_mr_exit1; + } + + /* successful registration of all pages */ + ib_mr = &e_mr->ib.ib_mr; + goto reg_user_mr_exit0; + +reg_user_mr_exit1: + ehca_mr_delete(e_mr); +reg_user_mr_exit0: + if (IS_ERR(ib_mr)) + EDEB_EX(4, "rc=%lx pd=%p region=%p mr_access_flags=%x " + "udata=%p", + PTR_ERR(ib_mr), pd, region, mr_access_flags, udata); + else + EDEB_EX(7, "ib_mr=%p lkey=%x rkey=%x", + ib_mr, ib_mr->lkey, ib_mr->rkey); + return ib_mr; +} /* end ehca_reg_user_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + int ret = 0; + struct ehca_shca *shca = NULL; + struct ehca_mr *e_mr = NULL; + u64 new_size = 0; + u64 *new_start = NULL; + u32 new_acl = 0; + struct ehca_pd *new_pd = NULL; + u32 tmp_lkey = 0; + u32 tmp_rkey = 0; + unsigned long sl_flags; + u32 num_pages_mr = 0; + u32 num_pages_4k = 0; /* 4k portion "pages" */ + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_pd *my_pd = NULL; + u32 cur_pid = current->tgid; + + EDEB_EN(7, "mr=%p mr_rereg_mask=%x pd=%p phys_buf_array=%p " + "num_phys_buf=%x mr_access_flags=%x iova_start=%p", + mr, mr_rereg_mask, pd, phys_buf_array, num_phys_buf, + mr_access_flags, iova_start); + + EHCA_CHECK_MR(mr); + my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + (my_pd->ownpid != cur_pid)) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { + /* TODO not supported, because PHYP rereg hCall needs pages */ + EDEB_ERR(4, "rereg without IB_MR_REREG_TRANS not supported yet," + " mr_rereg_mask=%x", mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + if (mr_rereg_mask & IB_MR_REREG_PD) { + EHCA_CHECK_PD(pd); + } + + if ((mr_rereg_mask & + ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || + (mr_rereg_mask == 0)) { + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + shca = container_of(mr->device, struct ehca_shca, ib_device); + + /* check other parameters */ + if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + EDEB_ERR(3, "rereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ + if (e_mr->flags & EHCA_MR_FLAG_FMR) { + EDEB_ERR(4, "not supported for FMR, mr=%p flags=%x", + mr, e_mr->flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (ehca_adr_bad(phys_buf_array) || num_phys_buf <= 0) { + EDEB_ERR(4, "bad input values: mr_rereg_mask=%x " + "phys_buf_array=%p num_phys_buf=%x", + mr_rereg_mask, phys_buf_array, num_phys_buf); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ + (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + EDEB_ERR(4, "bad input values: mr_rereg_mask=%x " + "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* set requested values dependent on rereg request */ + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + new_start = e_mr->start; /* new == old address */ + new_size = e_mr->size; /* new == old length */ + new_acl = e_mr->acl; /* new == old access control */ + new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/ + + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + new_start = iova_start; /* change address */ + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, + num_phys_buf, iova_start, + &new_size); + if (ret) + goto rereg_phys_mr_exit1; + if ((new_size == 0) || + (((u64)iova_start + new_size) < (u64)iova_start)) { + EDEB_ERR(4, "bad input values: new_size=%lx " + "iova_start=%p", new_size, iova_start); + ret = -EINVAL; + goto rereg_phys_mr_exit1; + } + num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.num_phys_buf = num_phys_buf; + pginfo.phys_buf_array = phys_buf_array; + pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); + } + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + new_acl = mr_access_flags; + if (mr_rereg_mask & IB_MR_REREG_PD) + new_pd = container_of(pd, struct ehca_pd, ib_pd); + + EDEB(7, "mr=%p new_start=%p new_size=%lx new_acl=%x new_pd=%p " + "num_pages_mr=%x num_pages_4k=%x", e_mr, new_start, new_size, + new_acl, new_pd, num_pages_mr, num_pages_4k); + + ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, + new_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto rereg_phys_mr_exit1; + + /* successful reregistration */ + if (mr_rereg_mask & IB_MR_REREG_PD) + mr->pd = pd; + mr->lkey = tmp_lkey; + mr->rkey = tmp_rkey; + +rereg_phys_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +rereg_phys_mr_exit0: + if (ret) + EDEB_EX(4, "ret=%x mr=%p mr_rereg_mask=%x pd=%p " + "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " + "iova_start=%p", + ret, mr, mr_rereg_mask, pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + else + EDEB_EX(7, "mr=%p mr_rereg_mask=%x pd=%p phys_buf_array=%p " + "num_phys_buf=%x mr_access_flags=%x iova_start=%p", + mr, mr_rereg_mask, pd, phys_buf_array, num_phys_buf, + mr_access_flags, iova_start); + + return ret; +} /* end ehca_rereg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + struct ehca_shca *shca = NULL; + struct ehca_mr *e_mr = NULL; + struct ehca_pd *my_pd = NULL; + u32 cur_pid = current->tgid; + unsigned long sl_flags; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + EDEB_EN(7, "mr=%p mr_attr=%p", mr, mr_attr); + + EHCA_CHECK_MR(mr); + + my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + (my_pd->ownpid != cur_pid)) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + goto query_mr_exit0; + } + + e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + if (ehca_adr_bad(mr_attr)) { + EDEB_ERR(4, "bad input values: mr_attr=%p", mr_attr); + ret = -EINVAL; + goto query_mr_exit0; + } + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + EDEB_ERR(4, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto query_mr_exit0; + } + + shca = container_of(mr->device, struct ehca_shca, ib_device); + memset(mr_attr, 0, sizeof(struct ib_mr_attr)); + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + + h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_mr_query failed, h_ret=%lx mr=%p " + "hca_hndl=%lx mr_hndl=%lx lkey=%x", + h_ret, mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca_mrmw_map_hrc_query_mr(h_ret); + goto query_mr_exit1; + } + mr_attr->pd = mr->pd; + mr_attr->device_virt_addr = hipzout.vaddr; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; + ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); + +query_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +query_mr_exit0: + if (ret) + EDEB_EX(4, "ret=%x mr=%p mr_attr=%p", ret, mr, mr_attr); + else + EDEB_EX(7, "pd=%p device_virt_addr=%lx size=%lx " + "mr_access_flags=%x lkey=%x rkey=%x", + mr_attr->pd, mr_attr->device_virt_addr, + mr_attr->size, mr_attr->mr_access_flags, + mr_attr->lkey, mr_attr->rkey); + return ret; +} /* end ehca_query_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_mr(struct ib_mr *mr) +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + struct ehca_shca *shca = NULL; + struct ehca_mr *e_mr = NULL; + struct ehca_pd *my_pd = NULL; + u32 cur_pid = current->tgid; + + EDEB_EN(7, "mr=%p", mr); + + EHCA_CHECK_MR(mr); + my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + (my_pd->ownpid != cur_pid)) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + goto dereg_mr_exit0; + } + + e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + shca = container_of(mr->device, struct ehca_shca, ib_device); + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + EDEB_ERR(4, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto dereg_mr_exit0; + } else if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + EDEB_ERR(3, "dereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto dereg_mr_exit0; + } + + /* TODO: BUSY: MR still has bound window(s) */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_free_mr failed, h_ret=%lx shca=%p e_mr=%p" + " hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", + h_ret, shca, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto dereg_mr_exit0; + } + + /* successful deregistration */ + ehca_mr_delete(e_mr); + +dereg_mr_exit0: + if (ret) + EDEB_EX(4, "ret=%x mr=%p", ret, mr); + else + EDEB_EX(7, ""); + return ret; +} /* end ehca_dereg_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) +{ + struct ib_mw *ib_mw = NULL; + u64 h_ret = H_SUCCESS; + struct ehca_shca *shca = NULL; + struct ehca_mw *e_mw = NULL; + struct ehca_pd *e_pd = NULL; + struct ehca_mw_hipzout_parms hipzout = {{0},0}; + + EDEB_EN(7, "pd=%p", pd); + + EHCA_CHECK_PD_P(pd); + e_pd = container_of(pd, struct ehca_pd, ib_pd); + shca = container_of(pd->device, struct ehca_shca, ib_device); + + e_mw = ehca_mw_new(); + if (!e_mw) { + ib_mw = ERR_PTR(-ENOMEM); + goto alloc_mw_exit0; + } + + h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_mw_allocate failed, h_ret=%lx shca=%p " + "hca_hndl=%lx mw=%p", h_ret, shca, + shca->ipz_hca_handle.handle, e_mw); + ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret)); + goto alloc_mw_exit1; + } + /* successful MW allocation */ + e_mw->ipz_mw_handle = hipzout.handle; + e_mw->ib_mw.rkey = hipzout.rkey; + ib_mw = &e_mw->ib_mw; + goto alloc_mw_exit0; + +alloc_mw_exit1: + ehca_mw_delete(e_mw); +alloc_mw_exit0: + if (IS_ERR(ib_mw)) + EDEB_EX(4, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd); + else + EDEB_EX(7, "ib_mw=%p rkey=%x", ib_mw, ib_mw->rkey); + return ib_mw; +} /* end ehca_alloc_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + int ret = 0; + + /* TODO: not supported up to now */ + EDEB_ERR(4, "bind MW currently not supported by HCAD"); + ret = -EPERM; + goto bind_mw_exit0; + +bind_mw_exit0: + if (ret) + EDEB_EX(4, "ret=%x qp=%p mw=%p mw_bind=%p", + ret, qp, mw, mw_bind); + else + EDEB_EX(7, "qp=%p mw=%p mw_bind=%p", qp, mw, mw_bind); + return ret; +} /* end ehca_bind_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_mw(struct ib_mw *mw) +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + struct ehca_shca *shca = NULL; + struct ehca_mw *e_mw = NULL; + + EDEB_EN(7, "mw=%p", mw); + + EHCA_CHECK_MW(mw); + e_mw = container_of(mw, struct ehca_mw, ib_mw); + shca = container_of(mw->device, struct ehca_shca, ib_device); + + h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_free_mw failed, h_ret=%lx shca=%p mw=%p " + "rkey=%x hca_hndl=%lx mw_hndl=%lx", + h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, + e_mw->ipz_mw_handle.handle); + ret = ehca_mrmw_map_hrc_free_mw(h_ret); + goto dealloc_mw_exit0; + } + /* successful deallocation */ + ehca_mw_delete(e_mw); + +dealloc_mw_exit0: + if (ret) + EDEB_EX(4, "ret=%x mw=%p", ret, mw); + else + EDEB_EX(7, ""); + return ret; +} /* end ehca_dealloc_mw() */ + +/*----------------------------------------------------------------------*/ + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct ib_fmr *ib_fmr = NULL; + struct ehca_shca *shca = NULL; + struct ehca_mr *e_fmr = NULL; + int ret = 0; + struct ehca_pd *e_pd = NULL; + u32 tmp_lkey = 0; + u32 tmp_rkey = 0; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + + EDEB_EN(7, "pd=%p mr_access_flags=%x fmr_attr=%p", + pd, mr_access_flags, fmr_attr); + + EHCA_CHECK_PD_P(pd); + if (ehca_adr_bad(fmr_attr)) { + EDEB_ERR(4, "bad input values: fmr_attr=%p", fmr_attr); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + EDEB(7, "max_pages=%x max_maps=%x page_shift=%x", + fmr_attr->max_pages, fmr_attr->max_maps, fmr_attr->page_shift); + + /* check other parameters */ + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + EDEB_ERR(4, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if (mr_access_flags & IB_ACCESS_MW_BIND) { + EDEB_ERR(4, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { + EDEB_ERR(4, "bad input values: fmr_attr->max_pages=%x " + "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", + fmr_attr->max_pages, fmr_attr->max_maps, + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) && + ((1 << fmr_attr->page_shift) != PAGE_SIZE)) { + EDEB_ERR(4, "unsupported fmr_attr->page_shift=%x", + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + e_pd = container_of(pd, struct ehca_pd, ib_pd); + shca = container_of(pd->device, struct ehca_shca, ib_device); + + e_fmr = ehca_mr_new(); + if (!e_fmr) { + ib_fmr = ERR_PTR(-ENOMEM); + goto alloc_fmr_exit0; + } + e_fmr->flags |= EHCA_MR_FLAG_FMR; + + /* register MR on HCA */ + ret = ehca_reg_mr(shca, e_fmr, NULL, + fmr_attr->max_pages * (1 << fmr_attr->page_shift), + mr_access_flags, e_pd, &pginfo, + &tmp_lkey, &tmp_rkey); + if (ret) { + ib_fmr = ERR_PTR(ret); + goto alloc_fmr_exit1; + } + + /* successful */ + e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; + e_fmr->fmr_max_pages = fmr_attr->max_pages; + e_fmr->fmr_max_maps = fmr_attr->max_maps; + e_fmr->fmr_map_cnt = 0; + ib_fmr = &e_fmr->ib.ib_fmr; + goto alloc_fmr_exit0; + +alloc_fmr_exit1: + ehca_mr_delete(e_fmr); +alloc_fmr_exit0: + if (IS_ERR(ib_fmr)) + EDEB_EX(4, "rc=%lx pd=%p mr_access_flags=%x " + "fmr_attr=%p", PTR_ERR(ib_fmr), pd, + mr_access_flags, fmr_attr); + else + EDEB_EX(7, "ib_fmr=%p tmp_lkey=%x tmp_rkey=%x", + ib_fmr, tmp_lkey, tmp_rkey); + return ib_fmr; +} /* end ehca_alloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, + int list_len, + u64 iova) +{ + int ret = 0; + struct ehca_shca *shca = NULL; + struct ehca_mr *e_fmr = NULL; + struct ehca_pd *e_pd = NULL; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 tmp_lkey = 0; + u32 tmp_rkey = 0; + + EDEB_EN(7, "fmr=%p page_list=%p list_len=%x iova=%lx", + fmr, page_list, list_len, iova); + + EHCA_CHECK_FMR(fmr); + e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + shca = container_of(fmr->device, struct ehca_shca, ib_device); + e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + EDEB_ERR(4, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); + if (ret) + goto map_phys_fmr_exit0; + if (iova % e_fmr->fmr_page_size) { + /* only whole-numbered pages */ + EDEB_ERR(4, "bad iova, iova=%lx fmr_page_size=%x", + iova, e_fmr->fmr_page_size); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { + /* HCAD does not limit the maps, however trace this anyway */ + EDEB(6, "map limit exceeded, fmr=%p e_fmr->fmr_map_cnt=%x " + "e_fmr->fmr_max_maps=%x", + fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); + } + + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_pages = list_len; + pginfo.page_list = page_list; + pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) / + EHCA_PAGESIZE); + + ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova, + list_len * e_fmr->fmr_page_size, + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto map_phys_fmr_exit0; + + /* successful reregistration */ + e_fmr->fmr_map_cnt++; + e_fmr->ib.ib_fmr.lkey = tmp_lkey; + e_fmr->ib.ib_fmr.rkey = tmp_rkey; + +map_phys_fmr_exit0: + if (ret) + EDEB_EX(4, "ret=%x fmr=%p page_list=%p list_len=%x iova=%lx", + ret, fmr, page_list, list_len, iova); + else + EDEB_EX(7, "lkey=%x rkey=%x", + e_fmr->ib.ib_fmr.lkey, e_fmr->ib.ib_fmr.rkey); + return ret; +} /* end ehca_map_phys_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_fmr(struct list_head *fmr_list) +{ + int ret = 0; + struct ib_fmr *ib_fmr = NULL; + struct ehca_shca *shca = NULL; + struct ehca_shca *prev_shca = NULL; + struct ehca_mr *e_fmr = NULL; + u32 num_fmr = 0; + u32 unmap_fmr_cnt = 0; + + EDEB_EN(7, "fmr_list=%p", fmr_list); + + /* check all FMR belong to same SHCA, and check internal flag */ + list_for_each_entry(ib_fmr, fmr_list, list) { + prev_shca = shca; + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + EHCA_CHECK_FMR(ib_fmr); + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + if ((shca != prev_shca) && prev_shca) { + EDEB_ERR(4, "SHCA mismatch, shca=%p prev_shca=%p " + "e_fmr=%p", shca, prev_shca, e_fmr); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + EDEB_ERR(4, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + num_fmr++; + } + + /* loop over all FMRs to unmap */ + list_for_each_entry(ib_fmr, fmr_list, list) { + unmap_fmr_cnt++; + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + ret = ehca_unmap_one_fmr(shca, e_fmr); + if (ret) { + /* unmap failed, stop unmapping of rest of FMRs */ + EDEB_ERR(4, "unmap of one FMR failed, stop rest, " + "e_fmr=%p num_fmr=%x unmap_fmr_cnt=%x lkey=%x", + e_fmr, num_fmr, unmap_fmr_cnt, + e_fmr->ib.ib_fmr.lkey); + goto unmap_fmr_exit0; + } + } + +unmap_fmr_exit0: + if (ret) + EDEB_EX(4, "ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", + ret, fmr_list, num_fmr, unmap_fmr_cnt); + else + EDEB_EX(7, "num_fmr=%x", num_fmr); + return ret; +} /* end ehca_unmap_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_fmr(struct ib_fmr *fmr) +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + struct ehca_shca *shca = NULL; + struct ehca_mr *e_fmr = NULL; + + EDEB_EN(7, "fmr=%p", fmr); + + EHCA_CHECK_FMR(fmr); + e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + shca = container_of(fmr->device, struct ehca_shca, ib_device); + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + EDEB_ERR(4, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto free_fmr_exit0; + } + + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_free_mr failed, h_ret=%lx e_fmr=%p " + "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, fmr->lkey); + ehca_mrmw_map_hrc_free_mr(h_ret); + goto free_fmr_exit0; + } + /* successful deregistration */ + ehca_mr_delete(e_fmr); + +free_fmr_exit0: + if (ret) + EDEB_EX(4, "ret=%x fmr=%p", ret, fmr); + else + EDEB_EX(7, ""); + return ret; +} /* end ehca_dealloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + u32 hipz_acl = 0; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + EDEB_EN(7, "shca=%p e_mr=%p iova_start=%p size=%lx acl=%x e_pd=%p " + "pginfo=%p num_pages=%lx num_4k=%lx", shca, e_mr, iova_start, + size, acl, e_pd, pginfo, pginfo->num_pages, pginfo->num_4k); + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + if (ehca_use_hp_mr == 1) + hipz_acl |= 0x00000001; + + h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_alloc_mr failed, h_ret=%lx hca_hndl=%lx", + h_ret, shca->ipz_hca_handle.handle); + ret = ehca_mrmw_map_hrc_alloc(h_ret); + goto ehca_reg_mr_exit0; + } + + e_mr->ipz_mr_handle = hipzout.handle; + + ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + if (ret) + goto ehca_reg_mr_exit1; + + /* successful registration */ + e_mr->num_pages = pginfo->num_pages; + e_mr->num_4k = pginfo->num_4k; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + goto ehca_reg_mr_exit0; + +ehca_reg_mr_exit1: + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + EDEB_ERR(1, "h_ret=%lx shca=%p e_mr=%p iova_start=%p " + "size=%lx acl=%x e_pd=%p lkey=%x pginfo=%p " + "num_pages=%lx num_4k=%lx ret=%x", h_ret, shca, e_mr, + iova_start, size, acl, e_pd, hipzout.lkey, pginfo, + pginfo->num_pages, pginfo->num_4k, ret); + EDEB_ERR(1, "internal error in ehca_reg_mr, not recoverable"); + } +ehca_reg_mr_exit0: + if (ret) + EDEB_EX(4, "ret=%x shca=%p e_mr=%p iova_start=%p size=%lx " + "acl=%x e_pd=%p pginfo=%p num_pages=%lx num_4k=%lx", + ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, + pginfo->num_pages, pginfo->num_4k); + else + EDEB_EX(7, "ret=%x lkey=%x rkey=%x", ret, *lkey, *rkey); + return ret; +} /* end ehca_reg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + u32 rnum = 0; + u64 rpage = 0; + u32 i; + u64 *kpage = NULL; + + EDEB_EN(7, "shca=%p e_mr=%p pginfo=%p num_pages=%lx num_4k=%lx", + shca, e_mr, pginfo, pginfo->num_pages, pginfo->num_4k); + + kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!kpage) { + EDEB_ERR(4, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_reg_mr_rpages_exit0; + } + + /* max 512 pages per shot */ + for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) { + + if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { + rnum = pginfo->num_4k % 512; /* last shot */ + if (rnum == 0) + rnum = 512; /* last shot is full */ + } else + rnum = 512; + + if (rnum > 1) { + ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage); + if (ret) { + EDEB_ERR(4, "ehca_set_pagebuf bad rc, ret=%x " + "rnum=%x kpage=%p", ret, rnum, kpage); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + rpage = virt_to_abs(kpage); + if (!rpage) { + EDEB_ERR(4, "kpage=%p i=%x", kpage, i); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + } else { /* rnum==1 */ + ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage); + if (ret) { + EDEB_ERR(4, "ehca_set_pagebuf_1 bad rc, " + "ret=%x i=%x", ret, i); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + } + + EDEB(9, "i=%x rnum=%x rpage=%lx", i, rnum, rpage); + + h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr, + 0, /* pagesize 4k */ + 0, rpage, rnum); + + if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { + /* + * check for 'registration complete'==H_SUCCESS + * and for 'page registered'==H_PAGE_REGISTERED + */ + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "last hipz_reg_rpage_mr failed, " + "h_ret=%lx e_mr=%p i=%x hca_hndl=%lx " + "mr_hndl=%lx lkey=%x", h_ret, e_mr, i, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca_mrmw_map_hrc_rrpg_last(h_ret); + break; + } else + ret = 0; + } else if (h_ret != H_PAGE_REGISTERED) { + EDEB_ERR(4, "hipz_reg_rpage_mr failed, h_ret=%lx " + "e_mr=%p i=%x lkey=%x hca_hndl=%lx " + "mr_hndl=%lx", h_ret, e_mr, i, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret); + break; + } else + ret = 0; + } /* end for(i) */ + + +ehca_reg_mr_rpages_exit1: + kfree(kpage); +ehca_reg_mr_rpages_exit0: + if (ret) + EDEB_EX(4, "ret=%x shca=%p e_mr=%p pginfo=%p num_pages=%lx " + "num_4k=%lx", ret, shca, e_mr, pginfo, + pginfo->num_pages, pginfo->num_4k); + else + EDEB_EX(7, "ret=%x", ret); + return ret; +} /* end ehca_reg_mr_rpages() */ + +/*----------------------------------------------------------------------*/ + +inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + u32 acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + u32 hipz_acl = 0; + u64 *kpage = NULL; + u64 rpage = 0; + struct ehca_mr_pginfo pginfo_save; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + EDEB_EN(7, "shca=%p e_mr=%p iova_start=%p size=%lx acl=%x " + "e_pd=%p pginfo=%p num_pages=%lx num_4k=%lx", shca, e_mr, + iova_start, size, acl, e_pd, pginfo, pginfo->num_pages, + pginfo->num_4k); + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + + kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!kpage) { + EDEB_ERR(4, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_rereg_mr_rereg1_exit0; + } + + pginfo_save = *pginfo; + ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage); + if (ret) { + EDEB_ERR(4, "set pagebuf failed, e_mr=%p pginfo=%p type=%x " + "num_pages=%lx num_4k=%lx kpage=%p", e_mr, pginfo, + pginfo->type, pginfo->num_pages, pginfo->num_4k,kpage); + goto ehca_rereg_mr_rereg1_exit1; + } + rpage = virt_to_abs(kpage); + if (!rpage) { + EDEB_ERR(4, "kpage=%p", kpage); + ret = -EFAULT; + goto ehca_rereg_mr_rereg1_exit1; + } + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, rpage, &hipzout); + if (h_ret != H_SUCCESS) { + /* + * reregistration unsuccessful, try it again with the 3 hCalls, + * e.g. this is required in case H_MR_CONDITION + * (MW bound or MR is shared) + */ + EDEB(6, "hipz_h_reregister_pmr failed (Rereg1), h_ret=%lx " + "e_mr=%p", h_ret, e_mr); + *pginfo = pginfo_save; + ret = -EAGAIN; + } else if ((u64*)hipzout.vaddr != iova_start) { + EDEB_ERR(4, "PHYP changed iova_start in rereg_pmr, " + "iova_start=%p iova_start_out=%lx e_mr=%p " + "mr_handle=%lx lkey=%x lkey_out=%x", iova_start, + hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey, hipzout.lkey); + ret = -EFAULT; + } else { + /* + * successful reregistration + * note: start and start_out are identical for eServer HCAs + */ + e_mr->num_pages = pginfo->num_pages; + e_mr->num_4k = pginfo->num_4k; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + } + +ehca_rereg_mr_rereg1_exit1: + kfree(kpage); +ehca_rereg_mr_rereg1_exit0: + if ( ret && (ret != -EAGAIN) ) + EDEB_EX(4, "ret=%x h_ret=%lx lkey=%x rkey=%x pginfo=%p " + "num_pages=%lx num_4k=%lx", ret, h_ret, *lkey, *rkey, + pginfo, pginfo->num_pages, pginfo->num_4k); + else + EDEB_EX(7, "ret=%x h_ret=%lx lkey=%x rkey=%x pginfo=%p " + "num_pages=%lx num_4k=%lx", ret, h_ret, *lkey, *rkey, + pginfo, pginfo->num_pages, pginfo->num_4k); + return ret; +} /* end ehca_rereg_mr_rereg1() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey) +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ + int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ + + EDEB_EN(7, "shca=%p e_mr=%p iova_start=%p size=%lx acl=%x " + "e_pd=%p pginfo=%p num_pages=%lx num_4k=%lx", shca, e_mr, + iova_start, size, acl, e_pd, pginfo, pginfo->num_pages, + pginfo->num_4k); + + /* first determine reregistration hCall(s) */ + if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) || + (pginfo->num_4k > e_mr->num_4k)) { + EDEB(7, "Rereg3 case, pginfo->num_4k=%lx " + "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k); + rereg_1_hcall = 0; + rereg_3_hcall = 1; + } + + if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ + rereg_1_hcall = 0; + rereg_3_hcall = 1; + e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; + EDEB(4, "Rereg MR for max-MR! e_mr=%p", e_mr); + } + + if (rereg_1_hcall) { + ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, + acl, e_pd, pginfo, lkey, rkey); + if (ret) { + if (ret == -EAGAIN) + rereg_3_hcall = 1; + else + goto ehca_rereg_mr_exit0; + } + } + + if (rereg_3_hcall) { + struct ehca_mr save_mr; + + /* first deregister old MR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_free_mr failed, h_ret=%lx e_mr=%p " + "hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", + h_ret, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto ehca_rereg_mr_exit0; + } + /* clean ehca_mr_t, without changing struct ib_mr and lock */ + save_mr = *e_mr; + ehca_mr_deletenew(e_mr); + + /* set some MR values */ + e_mr->flags = save_mr.flags; + e_mr->fmr_page_size = save_mr.fmr_page_size; + e_mr->fmr_max_pages = save_mr.fmr_max_pages; + e_mr->fmr_max_maps = save_mr.fmr_max_maps; + e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, + e_pd, pginfo, lkey, rkey); + if (ret) { + u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; + memcpy(&e_mr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_rereg_mr_exit0; + } + } + +ehca_rereg_mr_exit0: + if (ret) + EDEB_EX(4, "ret=%x shca=%p e_mr=%p iova_start=%p size=%lx " + "acl=%x e_pd=%p pginfo=%p num_pages=%lx lkey=%x rkey=%x" + " rereg_1_hcall=%x rereg_3_hcall=%x", ret, shca, e_mr, + iova_start, size, acl, e_pd, pginfo, pginfo->num_pages, + *lkey, *rkey, rereg_1_hcall, rereg_3_hcall); + else + EDEB_EX(7, "ret=%x shca=%p e_mr=%p iova_start=%p size=%lx " + "acl=%x e_pd=%p pginfo=%p num_pages=%lx lkey=%x rkey=%x" + " rereg_1_hcall=%x rereg_3_hcall=%x", ret, shca, e_mr, + iova_start, size, acl, e_pd, pginfo, pginfo->num_pages, + *lkey, *rkey, rereg_1_hcall, rereg_3_hcall); + + return ret; +} /* end ehca_rereg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr) +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */ + int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */ + struct ehca_pd *e_pd = NULL; + struct ehca_mr save_fmr; + u32 tmp_lkey = 0; + u32 tmp_rkey = 0; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + EDEB_EN(7, "shca=%p e_fmr=%p", shca, e_fmr); + + /* first check if reregistration hCall can be used for unmap */ + if (e_fmr->fmr_max_pages > 512) { + rereg_1_hcall = 0; + rereg_3_hcall = 1; + } + + e_pd = container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); + + if (rereg_1_hcall) { + /* + * note: after using rereg hcall with len=0, + * rereg hcall must be used again for registering pages + */ + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, + 0, 0, e_pd->fw_pd, 0, &hipzout); + if (h_ret != H_SUCCESS) { + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + EDEB_ERR(4, "hipz_reregister_pmr failed (Rereg1), " + "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " + "lkey=%x lkey_out=%x", h_ret, e_fmr, + shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + rereg_3_hcall = 1; + } else { + /* successful reregistration */ + e_fmr->start = NULL; + e_fmr->size = 0; + tmp_lkey = hipzout.lkey; + tmp_rkey = hipzout.rkey; + } + } + + if (rereg_3_hcall) { + struct ehca_mr save_mr; + + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_free_mr failed, h_ret=%lx e_fmr=%p " + "hca_hndl=%lx mr_hndl=%lx lkey=%x", h_ret, + e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_pages = 0; + pginfo.num_4k = 0; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_unmap_one_fmr_exit0; + } + } + +ehca_unmap_one_fmr_exit0: + EDEB_EX(7, "ret=%x tmp_lkey=%x tmp_rkey=%x fmr_max_pages=%x " + "rereg_1_hcall=%x rereg_3_hcall=%x", ret, tmp_lkey, tmp_rkey, + e_fmr->fmr_max_pages, rereg_1_hcall, rereg_3_hcall); + return ret; +} /* end ehca_unmap_one_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + u32 hipz_acl = 0; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + EDEB_EN(7,"shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x e_pd=%p", + shca, e_origmr, e_newmr, iova_start, acl, e_pd); + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_reg_smr failed, h_ret=%lx shca=%p e_origmr=%p" + " e_newmr=%p iova_start=%p acl=%x e_pd=%p hca_hndl=%lx" + " mr_hndl=%lx lkey=%x", h_ret, shca, e_origmr, e_newmr, + iova_start, acl, e_pd, shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + ret = ehca_mrmw_map_hrc_reg_smr(h_ret); + goto ehca_reg_smr_exit0; + } + /* successful registration */ + e_newmr->num_pages = e_origmr->num_pages; + e_newmr->num_4k = e_origmr->num_4k; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + goto ehca_reg_smr_exit0; + +ehca_reg_smr_exit0: + if (ret) + EDEB_EX(4, "ret=%x shca=%p e_origmr=%p e_newmr=%p " + "iova_start=%p acl=%x e_pd=%p", + ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); + else + EDEB_EX(7, "ret=%x lkey=%x rkey=%x", ret, *lkey, *rkey); + return ret; +} /* end ehca_reg_smr() */ + +/*----------------------------------------------------------------------*/ + +/* register internal max-MR to internal SHCA */ +int ehca_reg_internal_maxmr( + struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **e_maxmr) /*OUT*/ +{ + int ret = 0; + struct ehca_mr *e_mr = NULL; + u64 *iova_start = NULL; + u64 size_maxmr = 0; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ib_phys_buf ib_pbuf; + u32 num_pages_mr = 0; + u32 num_pages_4k = 0; /* 4k portion "pages" */ + + EDEB_EN(7, "shca=%p e_pd=%p e_maxmr=%p", shca, e_pd, e_maxmr); + + if (ehca_adr_bad(shca) || ehca_adr_bad(e_pd) || ehca_adr_bad(e_maxmr)) { + EDEB_ERR(4, "bad input values: shca=%p e_pd=%p e_maxmr=%p", + shca, e_pd, e_maxmr); + ret = -EINVAL; + goto ehca_reg_internal_maxmr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + EDEB_ERR(4, "out of memory"); + ret = -ENOMEM; + goto ehca_reg_internal_maxmr_exit0; + } + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + + /* register internal max-MR on HCA */ + size_maxmr = (u64)high_memory - PAGE_OFFSET; + EDEB(7, "high_memory=%p PAGE_OFFSET=%lx", high_memory, PAGE_OFFSET); + iova_start = (u64*)KERNELBASE; + ib_pbuf.addr = 0; + ib_pbuf.size = size_maxmr; + num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.num_phys_buf = 1; + pginfo.phys_buf_array = &ib_pbuf; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, + &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + EDEB_ERR(4, "reg of internal max MR failed, e_mr=%p " + "iova_start=%p size_maxmr=%lx num_pages_mr=%x " + "num_pages_4k=%x", e_mr, iova_start, size_maxmr, + num_pages_mr, num_pages_4k); + goto ehca_reg_internal_maxmr_exit1; + } + + /* successful registration of all pages */ + e_mr->ib.ib_mr.device = e_pd->ib_pd.device; + e_mr->ib.ib_mr.pd = &e_pd->ib_pd; + e_mr->ib.ib_mr.uobject = NULL; + atomic_inc(&(e_pd->ib_pd.usecnt)); + atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); + *e_maxmr = e_mr; + goto ehca_reg_internal_maxmr_exit0; + +ehca_reg_internal_maxmr_exit1: + ehca_mr_delete(e_mr); +ehca_reg_internal_maxmr_exit0: + if (ret) + EDEB_EX(4, "ret=%x shca=%p e_pd=%p e_maxmr=%p", + ret, shca, e_pd, e_maxmr); + else + EDEB_EX(7, "*e_maxmr=%p lkey=%x rkey=%x", + *e_maxmr, (*e_maxmr)->ib.ib_mr.lkey, + (*e_maxmr)->ib.ib_mr.rkey); + return ret; +} /* end ehca_reg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey) +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + struct ehca_mr *e_origmr = shca->maxmr; + u32 hipz_acl = 0; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + EDEB_EN(7,"shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x e_pd=%p", + shca, e_origmr, e_newmr, iova_start, acl, e_pd); + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_reg_smr failed, h_ret=%lx e_origmr=%p " + "hca_hndl=%lx mr_hndl=%lx lkey=%x", + h_ret, e_origmr, shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + ret = ehca_mrmw_map_hrc_reg_smr(h_ret); + goto ehca_reg_maxmr_exit0; + } + /* successful registration */ + e_newmr->num_pages = e_origmr->num_pages; + e_newmr->num_4k = e_origmr->num_4k; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + +ehca_reg_maxmr_exit0: + EDEB_EX(7, "ret=%x lkey=%x rkey=%x", ret, *lkey, *rkey); + return ret; +} /* end ehca_reg_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca) +{ + int ret = 0; + struct ehca_mr *e_maxmr = NULL; + struct ib_pd *ib_pd = NULL; + + EDEB_EN(7, "shca=%p shca->maxmr=%p", shca, shca->maxmr); + + if (!shca->maxmr) { + EDEB_ERR(4, "bad call, shca=%p", shca); + ret = -EINVAL; + goto ehca_dereg_internal_maxmr_exit0; + } + + e_maxmr = shca->maxmr; + ib_pd = e_maxmr->ib.ib_mr.pd; + shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ + + ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); + if (ret) { + EDEB_ERR(3, "dereg internal max-MR failed, " + "ret=%x e_maxmr=%p shca=%p lkey=%x", + ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); + shca->maxmr = e_maxmr; + goto ehca_dereg_internal_maxmr_exit0; + } + + atomic_dec(&ib_pd->usecnt); + +ehca_dereg_internal_maxmr_exit0: + if (ret) + EDEB_EX(4, "ret=%x shca=%p shca->maxmr=%p", + ret, shca, shca->maxmr); + else + EDEB_EX(7, ""); + return ret; +} /* end ehca_dereg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* + * check physical buffer array of MR verbs for validness and + * calculates MR size + */ +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size) +{ + struct ib_phys_buf *pbuf = phys_buf_array; + u64 size_count = 0; + u32 i; + + if (num_phys_buf == 0) { + EDEB_ERR(4, "bad phys buf array len, num_phys_buf=0"); + return -EINVAL; + } + /* check first buffer */ + if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { + EDEB_ERR(4, "iova_start/addr mismatch, iova_start=%p " + "pbuf->addr=%lx pbuf->size=%lx", + iova_start, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && + (num_phys_buf > 1)) { + EDEB_ERR(4, "addr/size mismatch in 1st buf, pbuf->addr=%lx " + "pbuf->size=%lx", pbuf->addr, pbuf->size); + return -EINVAL; + } + + for (i = 0; i < num_phys_buf; i++) { + if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { + EDEB_ERR(4, "bad address, i=%x pbuf->addr=%lx " + "pbuf->size=%lx", i, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((i > 0) && /* not 1st */ + (i < (num_phys_buf - 1)) && /* not last */ + (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { + EDEB_ERR(4, "bad size, i=%x pbuf->size=%lx", + i, pbuf->size); + return -EINVAL; + } + size_count += pbuf->size; + pbuf++; + } + + *size = size_count; + return 0; +} /* end ehca_mr_chk_buf_and_calc_size() */ + +/*----------------------------------------------------------------------*/ + +/* check page list of map FMR verb for validness */ +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len) +{ + u32 i; + u64 *page = NULL; + + if (ehca_adr_bad(page_list)) { + EDEB_ERR(4, "bad page_list, page_list=%p fmr=%p", + page_list, e_fmr); + return -EINVAL; + } + + if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { + EDEB_ERR(4, "bad list_len, list_len=%x e_fmr->fmr_max_pages=%x " + "fmr=%p", list_len, e_fmr->fmr_max_pages, e_fmr); + return -EINVAL; + } + + /* each page must be aligned */ + page = page_list; + for (i = 0; i < list_len; i++) { + if (*page % e_fmr->fmr_page_size) { + EDEB_ERR(4, "bad page, i=%x *page=%lx page=%p " + "fmr=%p fmr_page_size=%x", + i, *page, page, e_fmr, e_fmr->fmr_page_size); + return -EINVAL; + } + page++; + } + + return 0; +} /* end ehca_fmr_check_page_list() */ + +/*----------------------------------------------------------------------*/ + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + struct ib_umem_chunk *prev_chunk = NULL; + struct ib_umem_chunk *chunk = NULL; + struct ib_phys_buf *pbuf = NULL; + u64 *fmrlist = NULL; + u64 num4k = 0; + u64 pgaddr = 0; + u64 offs4k = 0; + u32 i = 0; + u32 j = 0; + + EDEB_EN(7, "pginfo=%p type=%x num_pages=%lx num_4k=%lx next_buf=%lx " + "next_4k=%lx number=%x kpage=%p page_cnt=%lx page_4k_cnt=%lx " + "next_listelem=%lx region=%p next_chunk=%p next_nmap=%lx", + pginfo, pginfo->type, pginfo->num_pages, pginfo->num_4k, + pginfo->next_buf, pginfo->next_4k, number, kpage, + pginfo->page_cnt, pginfo->page_4k_cnt, pginfo->next_listelem, + pginfo->region, pginfo->next_chunk, pginfo->next_nmap); + + if (pginfo->type == EHCA_MR_PGI_PHYS) { + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->phys_buf_array + pginfo->next_buf; + num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; + offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; + while (pginfo->next_4k < offs4k + num4k) { + /* sanity check */ + if ((pginfo->page_cnt >= pginfo->num_pages) || + (pginfo->page_4k_cnt >= pginfo->num_4k)) { + EDEB_ERR(4, "page_cnt >= num_pages, " + "page_cnt=%lx num_pages=%lx " + "page_4k_cnt=%lx num_4k=%lx " + "i=%x", pginfo->page_cnt, + pginfo->num_pages, + pginfo->page_4k_cnt, + pginfo->num_4k, i); + ret = -EFAULT; + } + *kpage = phys_to_abs( + (pbuf->addr & (EHCA_PAGESIZE-1)) + + (pginfo->next_4k * EHCA_PAGESIZE)); + if ( !(*kpage) && pbuf->addr ) { + EDEB_ERR(4, "pbuf->addr=%lx " + "pbuf->size=%lx next_4k=%lx", + pbuf->addr, pbuf->size, + pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) + (pginfo->page_cnt)++; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_4k >= offs4k + num4k) { + (pginfo->next_buf)++; + pginfo->next_4k = 0; + } + } + } else if (pginfo->type == EHCA_MR_PGI_USER) { + /* loop over desired chunk entries */ + chunk = pginfo->next_chunk; + prev_chunk = pginfo->next_chunk; + list_for_each_entry_continue(chunk, + (&(pginfo->region->chunk_list)), + list) { + EDEB(9, "chunk->page_list[0]=%lx", + (u64)sg_dma_address(&chunk->page_list[0])); + for (i = pginfo->next_nmap; i < chunk->nmap; ) { + pgaddr = ( page_to_pfn(chunk->page_list[i].page) + << PAGE_SHIFT ); + *kpage = phys_to_abs(pgaddr + + (pginfo->next_4k * + EHCA_PAGESIZE)); + EDEB(9,"pgaddr=%lx *kpage=%lx next_4k=%lx", + pgaddr, *kpage, pginfo->next_4k); + if ( !(*kpage) ) { + EDEB_ERR(4, "pgaddr=%lx " + "chunk->page_list[i]=%lx i=%x " + "next_4k=%lx mr=%p", pgaddr, + (u64)sg_dma_address( + &chunk->page_list[i]), + i, pginfo->next_4k, e_mr); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + kpage++; + if (pginfo->next_4k % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_nmap)++; + pginfo->next_4k = 0; + i++; + } + j++; + if (j >= number) break; + } + if ((pginfo->next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->next_nmap >= chunk->nmap) { + pginfo->next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; + } + pginfo->next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->region->chunk_list)), + list); + } else if (pginfo->type == EHCA_MR_PGI_FMR) { + /* loop over desired page_list entries */ + fmrlist = pginfo->page_list + pginfo->next_listelem; + for (i = 0; i < number; i++) { + *kpage = phys_to_abs((*fmrlist & (EHCA_PAGESIZE-1)) + + pginfo->next_4k * EHCA_PAGESIZE); + if ( !(*kpage) ) { + EDEB_ERR(4, "*fmrlist=%lx fmrlist=%p " + "next_listelem=%lx next_4k=%lx", + *fmrlist, fmrlist, + pginfo->next_listelem,pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + kpage++; + if (pginfo->next_4k % + (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_listelem)++; + fmrlist++; + pginfo->next_4k = 0; + } + } + } else { + EDEB_ERR(4, "bad pginfo->type=%x", pginfo->type); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + +ehca_set_pagebuf_exit0: + if (ret) + EDEB_EX(4, "ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " + "num_4k=%lx next_buf=%lx next_4k=%lx number=%x " + "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x " + "next_listelem=%lx region=%p next_chunk=%p " + "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type, + pginfo->num_pages, pginfo->num_4k, pginfo->next_buf, + pginfo->next_4k, number, kpage, pginfo->page_cnt, + pginfo->page_4k_cnt, i, pginfo->next_listelem, + pginfo->region, pginfo->next_chunk, pginfo->next_nmap); + else + EDEB_EX(7, "ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " + "num_4k=%lx next_buf=%lx next_4k=%lx number=%x " + "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x " + "next_listelem=%lx region=%p next_chunk=%p " + "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type, + pginfo->num_pages, pginfo->num_4k, pginfo->next_buf, + pginfo->next_4k, number, kpage, pginfo->page_cnt, + pginfo->page_4k_cnt, i, pginfo->next_listelem, + pginfo->region, pginfo->next_chunk, pginfo->next_nmap); + return ret; +} /* end ehca_set_pagebuf() */ + +/*----------------------------------------------------------------------*/ + +/* setup 1 page from page info page buffer */ +int ehca_set_pagebuf_1(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u64 *rpage) +{ + int ret = 0; + struct ib_phys_buf *tmp_pbuf = NULL; + u64 *fmrlist = NULL; + struct ib_umem_chunk *chunk = NULL; + struct ib_umem_chunk *prev_chunk = NULL; + u64 pgaddr = 0; + u64 num4k = 0; + u64 offs4k = 0; + + EDEB_EN(7, "pginfo=%p type=%x num_pages=%lx num_4k=%lx next_buf=%lx " + "next_4k=%lx rpage=%p page_cnt=%lx page_4k_cnt=%lx " + "next_listelem=%lx region=%p next_chunk=%p next_nmap=%lx", + pginfo, pginfo->type, pginfo->num_pages, pginfo->num_4k, + pginfo->next_buf, pginfo->next_4k, rpage, pginfo->page_cnt, + pginfo->page_4k_cnt, pginfo->next_listelem, pginfo->region, + pginfo->next_chunk, pginfo->next_nmap); + + if (pginfo->type == EHCA_MR_PGI_PHYS) { + /* sanity check */ + if ((pginfo->page_cnt >= pginfo->num_pages) || + (pginfo->page_4k_cnt >= pginfo->num_4k)) { + EDEB_ERR(4, "page_cnt >= num_pages, page_cnt=%lx " + "num_pages=%lx page_4k_cnt=%lx num_4k=%lx", + pginfo->page_cnt, pginfo->num_pages, + pginfo->page_4k_cnt, pginfo->num_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf; + num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; + offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; + *rpage = phys_to_abs((tmp_pbuf->addr & (EHCA_PAGESIZE-1)) + + (pginfo->next_4k * EHCA_PAGESIZE)); + if ( !(*rpage) && tmp_pbuf->addr ) { + EDEB_ERR(4, "tmp_pbuf->addr=%lx" + " tmp_pbuf->size=%lx next_4k=%lx", + tmp_pbuf->addr, tmp_pbuf->size, + pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0) + (pginfo->page_cnt)++; + if (pginfo->next_4k >= offs4k + num4k) { + (pginfo->next_buf)++; + pginfo->next_4k = 0; + } + } else if (pginfo->type == EHCA_MR_PGI_USER) { + chunk = pginfo->next_chunk; + prev_chunk = pginfo->next_chunk; + list_for_each_entry_continue(chunk, + (&(pginfo->region->chunk_list)), + list) { + pgaddr = ( page_to_pfn(chunk->page_list[ + pginfo->next_nmap].page) + << PAGE_SHIFT); + *rpage = phys_to_abs(pgaddr + + (pginfo->next_4k * EHCA_PAGESIZE)); + EDEB(9,"pgaddr=%lx *rpage=%lx next_4k=%lx", pgaddr, + *rpage, pginfo->next_4k); + if ( !(*rpage) ) { + EDEB_ERR(4, "pgaddr=%lx chunk->page_list[]=%lx " + "next_nmap=%lx next_4k=%lx mr=%p", + pgaddr, (u64)sg_dma_address( + &chunk->page_list[ + pginfo->next_nmap]), + pginfo->next_nmap, pginfo->next_4k, + e_mr); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_nmap)++; + pginfo->next_4k = 0; + } + if (pginfo->next_nmap >= chunk->nmap) { + pginfo->next_nmap = 0; + prev_chunk = chunk; + } + break; + } + pginfo->next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->region->chunk_list)), + list); + } else if (pginfo->type == EHCA_MR_PGI_FMR) { + fmrlist = pginfo->page_list + pginfo->next_listelem; + *rpage = phys_to_abs((*fmrlist & (EHCA_PAGESIZE-1)) + + pginfo->next_4k * EHCA_PAGESIZE); + if ( !(*rpage) ) { + EDEB_ERR(4, "*fmrlist=%lx fmrlist=%p next_listelem=%lx " + "next_4k=%lx", *fmrlist, fmrlist, + pginfo->next_listelem, pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % + (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_listelem)++; + pginfo->next_4k = 0; + } + } else { + EDEB_ERR(4, "bad pginfo->type=%x", pginfo->type); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + +ehca_set_pagebuf_1_exit0: + if (ret) + EDEB_EX(4, "ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " + "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p " + "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx " + "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr, + pginfo, pginfo->type, pginfo->num_pages, pginfo->num_4k, + pginfo->next_buf, pginfo->next_4k, rpage, + pginfo->page_cnt, pginfo->page_4k_cnt, + pginfo->next_listelem, pginfo->region, + pginfo->next_chunk, pginfo->next_nmap); + else + EDEB_EX(7, "ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " + "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p " + "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx " + "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr, + pginfo, pginfo->type, pginfo->num_pages, pginfo->num_4k, + pginfo->next_buf, pginfo->next_4k, rpage, + pginfo->page_cnt, pginfo->page_4k_cnt, + pginfo->next_listelem, pginfo->region, + pginfo->next_chunk, pginfo->next_nmap); + return ret; +} /* end ehca_set_pagebuf_1() */ + +/*----------------------------------------------------------------------*/ + +/* + * check MR if it is a max-MR, i.e. uses whole memory + * in case it's a max-MR 1 is returned, else 0 + */ +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start) +{ + /* a MR is treated as max-MR only if it fits following: */ + if ((size == ((u64)high_memory - PAGE_OFFSET)) && + (iova_start == (void*)KERNELBASE)) { + EDEB(6, "this is a max-MR"); + return 1; + } else + return 0; +} /* end ehca_mr_is_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* map access control for MR/MW. This routine is used for MR and MW. */ +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl) +{ + *hipz_acl = 0; + if (ib_acl & IB_ACCESS_REMOTE_READ) + *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; + if (ib_acl & IB_ACCESS_REMOTE_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; + if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) + *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; + if (ib_acl & IB_ACCESS_LOCAL_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; + if (ib_acl & IB_ACCESS_MW_BIND) + *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; +} /* end ehca_mrmw_map_acl() */ + +/*----------------------------------------------------------------------*/ + +/* sets page size in hipz access control for MR/MW. */ +void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/ +{ + return; /* HCA supports only 4k */ +} /* end ehca_mrmw_set_pgsize_hipz_acl() */ + +/*----------------------------------------------------------------------*/ + +/* + * reverse map access control for MR/MW. + * This routine is used for MR and MW. + */ +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl) /*OUT*/ +{ + *ib_acl = 0; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) + *ib_acl |= IB_ACCESS_REMOTE_READ; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) + *ib_acl |= IB_ACCESS_REMOTE_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) + *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; + if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) + *ib_acl |= IB_ACCESS_LOCAL_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) + *ib_acl |= IB_ACCESS_MW_BIND; +} /* end ehca_mrmw_reverse_map_acl() */ + + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for MR/MW allocations + * Used for hipz_mr_reg_alloc and hipz_mw_alloc. + */ +int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* successful completion */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RT_PARM: /* invalid resource type */ + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_MLENGTH_PARM: /* invalid memory length */ + case H_MEM_ACCESS_PARM: /* invalid access controls */ + case H_CONSTRAINED: /* resource constraint */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_alloc() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for MR register rpage + * Used for hipz_h_register_rpage_mr at registering last page + */ +int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* registration complete */ + return 0; + case H_PAGE_REGISTERED: /* page registered */ + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ +/* case H_QT_PARM: invalid queue type */ + case H_PARAMETER: /* + * invalid logical address, + * or count zero or greater 512 + */ + case H_TABLE_FULL: /* page table full */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_rrpg_last() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for MR register rpage + * Used for hipz_h_register_rpage_mr at registering one page, but not last page + */ +int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_PAGE_REGISTERED: /* page registered */ + return 0; + case H_SUCCESS: /* registration complete */ + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ +/* case H_QT_PARM: invalid queue type */ + case H_PARAMETER: /* + * invalid logical address, + * or count zero or greater 512 + */ + case H_TABLE_FULL: /* page table full */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_rrpg_notlast() */ + +/*----------------------------------------------------------------------*/ + +/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */ +int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* successful completion */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_query_mr() */ + +/*----------------------------------------------------------------------*/ +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for freeing MR resource + * Used for hipz_h_free_resource_mr + */ +int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* resource freed */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + case H_R_STATE: /* invalid resource state */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_free_mr() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for freeing MW resource + * Used for hipz_h_free_resource_mw + */ +int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* resource freed */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + case H_R_STATE: /* invalid resource state */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_free_mw() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for SMR registrations + * Used for hipz_h_register_smr. + */ +int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* successful completion */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + case H_MEM_PARM: /* invalid MR virtual address */ + case H_MEM_ACCESS_PARM: /* invalid access controls */ + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_reg_smr() */ + +/*----------------------------------------------------------------------*/ + +/* + * MR destructor and constructor + * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, + * except struct ib_mr and spinlock + */ +void ehca_mr_deletenew(struct ehca_mr *mr) +{ + mr->flags = 0; + mr->num_pages = 0; + mr->num_4k = 0; + mr->acl = 0; + mr->start = NULL; + mr->fmr_page_size = 0; + mr->fmr_max_pages = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; + memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); + memset(&mr->galpas, 0, sizeof(mr->galpas)); + mr->nr_of_pages = 0; + mr->pagearray = NULL; +} /* end ehca_mr_deletenew() */ diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h new file mode 100644 index 0000000..01899b4 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -0,0 +1,143 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW declarations and inline functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EHCA_MRMW_H_ +#define _EHCA_MRMW_H_ + +#undef DEB_PREFIX +#define DEB_PREFIX "mrmw" + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey); + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int mr_access_flags, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey); + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr); + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_reg_internal_maxmr(struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **maxmr); + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca); + +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size); + +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len); + +int ehca_set_pagebuf(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage); + +int ehca_set_pagebuf_1(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u64 *rpage); + +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start); + +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl); + +void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl); + +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl); + +int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc); + +void ehca_mr_deletenew(struct ehca_mr *mr); + +#endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c new file mode 100644 index 0000000..afcbe59 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -0,0 +1,120 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * PD functions + * + * Authors: Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#define DEB_PREFIX "vpd " + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, struct ib_udata *udata) +{ + extern struct ehca_module ehca_module; + struct ib_pd *mypd = NULL; + struct ehca_pd *pd = NULL; + + EDEB_EN(7, "device=%p context=%p udata=%p", device, context, udata); + + EHCA_CHECK_DEVICE_P(device); + + pd = kmem_cache_alloc(ehca_module.cache_pd, SLAB_KERNEL); + if (!pd) { + EDEB_ERR(4, "ERROR device=%p context=%p pd=%p" + " out of memory", device, context, mypd); + return ERR_PTR(-ENOMEM); + } + + memset(pd, 0, sizeof(struct ehca_pd)); + pd->ownpid = current->tgid; + + /* + * Kernel PD: when device = -1, 0 + * User PD: when context != -1 + */ + if (!context) { + /* + * Kernel PDs after init reuses always + * the one created in ehca_shca_reopen() + */ + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + pd->fw_pd.value = shca->pd->fw_pd.value; + } else + pd->fw_pd.value = (u64)pd; + + mypd = &pd->ib_pd; + + EHCA_REGISTER_PD(device, pd); + + EDEB_EX(7, "device=%p context=%p pd=%p", device, context, mypd); + + return mypd; +} + +int ehca_dealloc_pd(struct ib_pd *pd) +{ + extern struct ehca_module ehca_module; + int ret = 0; + u32 cur_pid = current->tgid; + struct ehca_pd *my_pd = NULL; + + EDEB_EN(7, "pd=%p", pd); + + EHCA_CHECK_PD(pd); + my_pd = container_of(pd, struct ehca_pd, ib_pd); + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + EHCA_DEREGISTER_PD(pd); + + kmem_cache_free(ehca_module.cache_pd, + container_of(pd, struct ehca_pd, ib_pd)); + + EDEB_EX(7, "pd=%p", pd); + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h new file mode 100644 index 0000000..8707d29 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -0,0 +1,259 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Hardware request structures + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _EHCA_QES_H_ +#define _EHCA_QES_H_ + +#include "ehca_tools.h" + +/* virtual scatter gather entry to specify remote adresses with length */ +struct ehca_vsgentry { + u64 vaddr; + u32 lkey; + u32 length; +}; + +#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63) + +/* + * Unreliable Datagram Address Vector Format + * see IBTA Vol1 chapter 8.3 Global Routing Header + */ +struct ehca_ud_av { + u8 sl; + u8 lnh; + u16 dlid; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 slid_path_bits; + u8 reserved4; + u8 ipd; + u8 reserved5; + u8 pmtu; + u32 reserved6; + u64 reserved7; + union { + struct { + u64 word_0; /* always set to 6 */ + /*should be 0x1B for IB transport */ + u64 word_1; + u64 word_2; + u64 word_3; + u64 word_4; + } grh; + struct { + u32 wd_0; + u32 wd_1; + /* DWord_1 --> SGID */ + + u32 sgid_wd3; + u32 sgid_wd2; + + u32 sgid_wd1; + u32 sgid_wd0; + /* DWord_3 --> DGID */ + + u32 dgid_wd3; + u32 dgid_wd2; + + u32 dgid_wd1; + u32 dgid_wd0; + } grh_l; + }; +}; + +/* maximum number of sg entries allowed in a WQE */ +#define MAX_WQE_SG_ENTRIES 252 + +#define WQE_OPTYPE_SEND 0x80 +#define WQE_OPTYPE_RDMAREAD 0x40 +#define WQE_OPTYPE_RDMAWRITE 0x20 +#define WQE_OPTYPE_CMPSWAP 0x10 +#define WQE_OPTYPE_FETCHADD 0x08 +#define WQE_OPTYPE_BIND 0x04 + +#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 +#define WQE_WRFLAG_FENCE 0x40 +#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 +#define WQE_WRFLAG_SOLIC_EVENT 0x10 + +#define WQEF_CACHE_HINT 0x80 +#define WQEF_CACHE_HINT_RD_WR 0x40 +#define WQEF_TIMED_WQE 0x20 +#define WQEF_PURGE 0x08 +#define WQEF_HIGH_NIBBLE 0xF0 + +#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 +#define MW_BIND_ACCESSCTRL_R_READ 0x20 +#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 + +struct ehca_wqe { + u64 work_request_id; + u8 optype; + u8 wr_flag; + u16 pkeyi; + u8 wqef; + u8 nr_of_data_seg; + u16 wqe_provided_slid; + u32 destination_qp_number; + u32 resync_psn_sqp; + u32 local_ee_context_qkey; + u32 immediate_data; + union { + struct { + u64 remote_virtual_adress; + u32 rkey; + u32 reserved; + u64 atomic_1st_op_dma_len; + u64 atomic_2nd_op; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + + } nud; + struct { + u64 ehca_ud_av_ptr; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } ud_avp; + struct { + struct ehca_ud_av ud_av; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - + 2]; + } ud_av; + struct { + u64 reserved0; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } all_rcv; + + struct { + u64 reserved; + u32 rkey; + u32 old_rkey; + u64 reserved1; + u64 reserved2; + u64 virtual_address; + u32 reserved3; + u32 length; + u32 reserved4; + u16 reserved5; + u8 reserved6; + u8 lr_ctl; + u32 lkey; + u32 reserved7; + u64 reserved8; + u64 reserved9; + u64 reserved10; + u64 reserved11; + } bind; + struct { + u64 reserved12; + u64 reserved13; + u32 size; + u32 start; + } inline_data; + } u; + +}; + +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1,1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2) +#define WC_SE_BIT EHCA_BMASK_IBM(3,3) +#define WC_STATUS_ERROR_BIT 0x80000000 +#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 +#define WC_STATUS_PURGE_BIT 0x10 + +struct ehca_cqe { + u64 work_request_id; + u8 optype; + u8 w_completion_flags; + u16 reserved1; + u32 nr_bytes_transferred; + u32 immediate_data; + u32 local_qp_number; + u8 freed_resource_count; + u8 service_level; + u16 wqe_count; + u32 qp_token; + u32 qkey_ee_token; + u32 remote_qp_number; + u16 dlid; + u16 rlid; + u16 reserved2; + u16 pkey_index; + u32 cqe_timestamp; + u32 wqe_timestamp; + u8 wqe_timestamp_valid; + u8 reserved3; + u8 reserved4; + u8 cqe_flags; + u32 status; +}; + +struct ehca_eqe { + u64 entry; +}; + +struct ehca_mrte { + u64 starting_va; + u64 length; /* length of memory region in bytes*/ + u32 pd; + u8 key_instance; + u8 pagesize; + u8 mr_control; + u8 local_remote_access_ctrl; + u8 reserved[0x20 - 0x18]; + u64 at_pointer[4]; +}; +#endif /*_EHCA_QES_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c new file mode 100644 index 0000000..e9434a4 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -0,0 +1,1593 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * QP functions + * + * Authors: Waleri Fomin + * Hoang-Nam Nguyen + * Reinhard Ernst + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#define DEB_PREFIX "e_qp" + +#include + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +/* + * attributes not supported by query qp + */ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \ + IB_QP_MAX_QP_RD_ATOMIC | \ + IB_QP_ACCESS_FLAGS | \ + IB_QP_EN_SQD_ASYNC_NOTIFY) + +/* + * ehca (internal) qp state values + */ +enum ehca_qp_state { + EHCA_QPS_RESET = 1, + EHCA_QPS_INIT = 2, + EHCA_QPS_RTR = 3, + EHCA_QPS_RTS = 5, + EHCA_QPS_SQD = 6, + EHCA_QPS_SQE = 8, + EHCA_QPS_ERR = 128 +}; + +/* + * qp state transitions as defined by IB Arch Rel 1.1 page 431 + */ +enum ib_qp_statetrans { + IB_QPST_ANY2RESET, + IB_QPST_ANY2ERR, + IB_QPST_RESET2INIT, + IB_QPST_INIT2RTR, + IB_QPST_INIT2INIT, + IB_QPST_RTR2RTS, + IB_QPST_RTS2SQD, + IB_QPST_RTS2RTS, + IB_QPST_SQD2RTS, + IB_QPST_SQE2RTS, + IB_QPST_SQD2SQD, + IB_QPST_MAX /* nr of transitions, this must be last!!! */ +}; + +/* + * ib2ehca_qp_state maps IB to ehca qp_state + * returns ehca qp state corresponding to given ib qp state + */ +static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) +{ + switch (ib_qp_state) { + case IB_QPS_RESET: + return EHCA_QPS_RESET; + case IB_QPS_INIT: + return EHCA_QPS_INIT; + case IB_QPS_RTR: + return EHCA_QPS_RTR; + case IB_QPS_RTS: + return EHCA_QPS_RTS; + case IB_QPS_SQD: + return EHCA_QPS_SQD; + case IB_QPS_SQE: + return EHCA_QPS_SQE; + case IB_QPS_ERR: + return EHCA_QPS_ERR; + default: + EDEB_ERR(4, "invalid ib_qp_state=%x", ib_qp_state); + return -EINVAL; + } +} + +/* + * ehca2ib_qp_state maps ehca to IB qp_state + * returns ib qp state corresponding to given ehca qp state + */ +static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state + ehca_qp_state) +{ + switch (ehca_qp_state) { + case EHCA_QPS_RESET: + return IB_QPS_RESET; + case EHCA_QPS_INIT: + return IB_QPS_INIT; + case EHCA_QPS_RTR: + return IB_QPS_RTR; + case EHCA_QPS_RTS: + return IB_QPS_RTS; + case EHCA_QPS_SQD: + return IB_QPS_SQD; + case EHCA_QPS_SQE: + return IB_QPS_SQE; + case EHCA_QPS_ERR: + return IB_QPS_ERR; + default: + EDEB_ERR(4,"invalid ehca_qp_state=%x",ehca_qp_state); + return -EINVAL; + } +} + +/* + * ehca_qp_type used as index for req_attr and opt_attr of + * struct ehca_modqp_statetrans + */ +enum ehca_qp_type { + QPT_RC = 0, + QPT_UC = 1, + QPT_UD = 2, + QPT_SQP = 3, + QPT_MAX +}; + +/* + * ib2ehcaqptype maps Ib to ehca qp_type + * returns ehca qp type corresponding to ib qp type + */ +static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return QPT_SQP; + case IB_QPT_RC: + return QPT_RC; + case IB_QPT_UC: + return QPT_UC; + case IB_QPT_UD: + return QPT_UD; + default: + EDEB_ERR(4,"Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, + int ib_tostate) +{ + int index = -EINVAL; + switch (ib_tostate) { + case IB_QPS_RESET: + index = IB_QPST_ANY2RESET; + break; + case IB_QPS_INIT: + if (ib_fromstate == IB_QPS_RESET) + index = IB_QPST_RESET2INIT; + else if (ib_fromstate == IB_QPS_INIT) + index = IB_QPST_INIT2INIT; + break; + case IB_QPS_RTR: + if (ib_fromstate == IB_QPS_INIT) + index = IB_QPST_INIT2RTR; + break; + case IB_QPS_RTS: + if (ib_fromstate == IB_QPS_RTR) + index = IB_QPST_RTR2RTS; + else if (ib_fromstate == IB_QPS_RTS) + index = IB_QPST_RTS2RTS; + else if (ib_fromstate == IB_QPS_SQD) + index = IB_QPST_SQD2RTS; + else if (ib_fromstate == IB_QPS_SQE) + index = IB_QPST_SQE2RTS; + break; + case IB_QPS_SQD: + if (ib_fromstate == IB_QPS_RTS) + index = IB_QPST_RTS2SQD; + break; + case IB_QPS_SQE: + break; + case IB_QPS_ERR: + index = IB_QPST_ANY2ERR; + break; + default: + break; + } + return index; +} + +enum ehca_service_type { + ST_RC = 0, + ST_UC = 1, + ST_RD = 2, + ST_UD = 3 +}; + +/* + * ibqptype2servicetype returns hcp service type corresponding to given + * ib qp type used by create_qp() + */ +static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return ST_UD; + case IB_QPT_RC: + return ST_RC; + case IB_QPT_UC: + return ST_UC; + case IB_QPT_UD: + return ST_UD; + case IB_QPT_RAW_IPV6: + return -EINVAL; + case IB_QPT_RAW_ETY: + return -EINVAL; + default: + EDEB_ERR(4, "Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +/* + * init_qp_queues initializes/constructs r/squeue and registers queue pages. + */ +static inline int init_qp_queues(struct ipz_adapter_handle ipz_hca_handle, + struct ehca_qp *my_qp, + int nr_sq_pages, + int nr_rq_pages, + int swqe_size, + int rwqe_size, + int nr_send_sges, int nr_receive_sges) +{ + int ret = -EINVAL; + int cnt = 0; + void *vpage = NULL; + u64 rpage = 0; + int ipz_rc = -1; + u64 h_ret = H_PARAMETER; + + ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue, + nr_sq_pages, + EHCA_PAGESIZE, swqe_size, nr_send_sges); + if (!ipz_rc) { + EDEB_ERR(4, "Cannot allocate page for squeue. ipz_rc=%x", + ipz_rc); + ret = -EBUSY; + return ret; + } + + ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue, + nr_rq_pages, + EHCA_PAGESIZE, rwqe_size, nr_receive_sges); + if (!ipz_rc) { + EDEB_ERR(4, "Cannot allocate page for rqueue. ipz_rc=%x", + ipz_rc); + ret = -EBUSY; + goto init_qp_queues0; + } + /* register SQ pages */ + for (cnt = 0; cnt < nr_sq_pages; cnt++) { + vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue); + if (!vpage) { + EDEB_ERR(4, "SQ ipz_qpageit_get_inc() " + "failed p_vpage= %p", vpage); + ret = -EINVAL; + goto init_qp_queues1; + } + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, 0, 0, + rpage, 1, + my_qp->galpas.kernel); + if (h_ret < H_SUCCESS) { + EDEB_ERR(4,"SQ hipz_qp_register_rpage() faield " + "rc=%lx", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + } + + ipz_qeit_reset(&my_qp->ipz_squeue); + + /* register RQ pages */ + for (cnt = 0; cnt < nr_rq_pages; cnt++) { + vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); + if (!vpage) { + EDEB_ERR(4,"RQ ipz_qpageit_get_inc() " + "failed p_vpage = %p", vpage); + h_ret = H_RESOURCE; + ret = -EINVAL; + goto init_qp_queues1; + } + + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, 0, 1, + rpage, 1,my_qp->galpas.kernel); + if (h_ret < H_SUCCESS) { + EDEB_ERR(4, "RQ hipz_qp_register_rpage() failed " + "rc=%lx", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + if (cnt == (nr_rq_pages - 1)) { /* last page! */ + if (h_ret != H_SUCCESS) { + EDEB_ERR(4,"RQ hipz_qp_register_rpage() " + "h_ret= %lx ", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); + if (vpage) { + EDEB_ERR(4,"ipz_qpageit_get_inc() " + "should not succeed vpage=%p", + vpage); + ret = -EINVAL; + goto init_qp_queues1; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + EDEB_ERR(4,"RQ hipz_qp_register_rpage() " + "h_ret= %lx ", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + } + } + + ipz_qeit_reset(&my_qp->ipz_rqueue); + + return 0; + +init_qp_queues1: + ipz_queue_dtor(&my_qp->ipz_rqueue); +init_qp_queues0: + ipz_queue_dtor(&my_qp->ipz_squeue); + return ret; +} + + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + extern struct ehca_module ehca_module; + static int da_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 }; + int ret = -EINVAL; + + struct ehca_qp *my_qp = NULL; + struct ehca_pd *my_pd = NULL; + struct ehca_shca *shca = NULL; + struct ib_ucontext *context = NULL; + u64 h_ret = H_PARAMETER; + int max_send_sge; + int max_recv_sge; + + /* h_call's out parameters */ + struct ehca_alloc_qp_parms parms; + u32 qp_nr = 0, swqe_size = 0, rwqe_size = 0; + u8 daqp_completion, isdaqp; + unsigned long flags; + + EDEB_EN(7,"pd=%p init_attr=%p", pd, init_attr); + EHCA_CHECK_PD_P(pd); + EHCA_CHECK_ADR_P(init_attr); + + if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && + init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { + EDEB_ERR(4, "init_attr->sg_sig_type=%x not allowed", + init_attr->sq_sig_type); + return ERR_PTR(-EINVAL); + } + + /* save daqp completion bits */ + daqp_completion = init_attr->qp_type & 0x60; + /* save daqp bit */ + isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0; + init_attr->qp_type = init_attr->qp_type & 0x1F; + + if (init_attr->qp_type != IB_QPT_UD && + init_attr->qp_type != IB_QPT_SMI && + init_attr->qp_type != IB_QPT_GSI && + init_attr->qp_type != IB_QPT_UC && + init_attr->qp_type != IB_QPT_RC) { + EDEB_ERR(4,"wrong QP Type=%x",init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + if (init_attr->qp_type != IB_QPT_RC && isdaqp != 0) { + EDEB_ERR(4,"unsupported LL QP Type=%x",init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + + if (pd->uobject && udata) + context = pd->uobject->context; + + my_qp = kmem_cache_alloc(ehca_module.cache_qp, SLAB_KERNEL); + if (!my_qp) { + EDEB_ERR(4, "pd=%p not enough memory to alloc qp", pd); + return ERR_PTR(-ENOMEM); + } + + memset(my_qp, 0, sizeof(struct ehca_qp)); + memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms)); + spin_lock_init(&my_qp->spinlock_s); + spin_lock_init(&my_qp->spinlock_r); + + my_pd = container_of(pd, struct ehca_pd, ib_pd); + + shca = container_of(pd->device, struct ehca_shca, ib_device); + my_qp->recv_cq = + container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); + my_qp->send_cq = + container_of(init_attr->send_cq, struct ehca_cq, ib_cq); + + my_qp->init_attr = *init_attr; + + do { + if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { + ret = -ENOMEM; + EDEB_ERR(4, "Can't reserve idr resources."); + goto create_qp_exit0; + } + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + } while (ret == -EAGAIN); + + if (ret) { + ret = -ENOMEM; + EDEB_ERR(4, "Can't allocate new idr entry."); + goto create_qp_exit0; + } + + parms.servicetype = ibqptype2servicetype(init_attr->qp_type); + if (parms.servicetype < 0) { + ret = -EINVAL; + EDEB_ERR(4, "Invalid qp_type=%x", init_attr->qp_type); + goto create_qp_exit0; + } + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + parms.sigtype = HCALL_SIGT_EVERY; + else + parms.sigtype = HCALL_SIGT_BY_WQE; + + /* UD_AV CIRCUMVENTION */ + max_send_sge = init_attr->cap.max_send_sge; + max_recv_sge = init_attr->cap.max_recv_sge; + if (IB_QPT_UD == init_attr->qp_type || + IB_QPT_GSI == init_attr->qp_type || + IB_QPT_SMI == init_attr->qp_type) { + max_send_sge += 2; + max_recv_sge += 2; + } + + EDEB(7, "isdaqp=%x daqp_completion=%x", isdaqp, daqp_completion); + + parms.ipz_eq_handle = shca->eq.ipz_eq_handle; + parms.daqp_ctrl = isdaqp | daqp_completion; + parms.pd = my_pd->fw_pd; + parms.max_recv_sge = max_recv_sge; + parms.max_send_sge = max_send_sge; + + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms); + + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "h_alloc_resource_qp() failed h_ret=%lx", h_ret); + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit1; + } + + switch (init_attr->qp_type) { + case IB_QPT_RC: + if (isdaqp == 0) { + swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ + (parms.act_nr_send_sges)]); + rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ + (parms.act_nr_recv_sges)]); + } else { /* for daqp we need to use msg size, not wqe size */ + swqe_size = da_msg_size[max_send_sge]; + rwqe_size = da_msg_size[max_recv_sge]; + parms.act_nr_send_sges = 1; + parms.act_nr_recv_sges = 1; + } + break; + case IB_QPT_UC: + swqe_size = offsetof(struct ehca_wqe, + u.nud.sg_list[parms.act_nr_send_sges]); + rwqe_size = offsetof(struct ehca_wqe, + u.nud.sg_list[parms.act_nr_recv_sges]); + break; + + case IB_QPT_UD: + case IB_QPT_GSI: + case IB_QPT_SMI: + /* UD circumvention */ + parms.act_nr_recv_sges -= 2; + parms.act_nr_send_sges -= 2; + swqe_size = offsetof(struct ehca_wqe, + u.ud_av.sg_list[parms.act_nr_send_sges]); + rwqe_size = offsetof(struct ehca_wqe, + u.ud_av.sg_list[parms.act_nr_recv_sges]); + + if (IB_QPT_GSI == init_attr->qp_type || + IB_QPT_SMI == init_attr->qp_type) { + parms.act_nr_send_wqes = init_attr->cap.max_send_wr; + parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; + parms.act_nr_send_sges = init_attr->cap.max_send_sge; + parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; + my_qp->real_qp_num = + (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1; + } + + break; + + default: + break; + } + + /* initializes r/squeue and registers queue pages */ + ret = init_qp_queues(shca->ipz_hca_handle, my_qp, + parms.nr_sq_pages, parms.nr_rq_pages, + swqe_size, rwqe_size, + parms.act_nr_send_sges, parms.act_nr_recv_sges); + if (ret) { + EDEB_ERR(4,"Couldn't initialize r/squeue and pages ret=%x", + ret); + goto create_qp_exit2; + } + + my_qp->ib_qp.pd = &my_pd->ib_pd; + my_qp->ib_qp.device = my_pd->ib_pd.device; + + my_qp->ib_qp.recv_cq = init_attr->recv_cq; + my_qp->ib_qp.send_cq = init_attr->send_cq; + + my_qp->ib_qp.qp_num = my_qp->real_qp_num; + my_qp->ib_qp.qp_type = init_attr->qp_type; + + my_qp->qp_type = init_attr->qp_type; + my_qp->ib_qp.srq = init_attr->srq; + + my_qp->ib_qp.qp_context = init_attr->qp_context; + my_qp->ib_qp.event_handler = init_attr->event_handler; + + init_attr->cap.max_inline_data = 0; /* not supported yet */ + init_attr->cap.max_recv_sge = parms.act_nr_recv_sges; + init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes; + init_attr->cap.max_send_sge = parms.act_nr_send_sges; + init_attr->cap.max_send_wr = parms.act_nr_send_wqes; + + /* NOTE: define_apq0() not supported yet */ + if (init_attr->qp_type == IB_QPT_GSI) { + h_ret = ehca_define_sqp(shca, my_qp, init_attr); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "ehca_define_sqp() failed rc=%lx",h_ret); + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit3; + } + } + if (init_attr->send_cq) { + struct ehca_cq *cq = container_of(init_attr->send_cq, + struct ehca_cq, ib_cq); + ret = ehca_cq_assign_qp(cq, my_qp); + if (ret) { + EDEB_ERR(4, "Couldn't assign qp to send_cq ret=%x", + ret); + goto create_qp_exit3; + } + my_qp->send_cq = cq; + } + /* copy queues, galpa data to user space */ + if (context && udata) { + struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue; + struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue; + struct ehca_create_qp_resp resp; + struct vm_area_struct * vma; + memset(&resp, 0, sizeof(resp)); + + resp.qp_num = my_qp->real_qp_num; + resp.token = my_qp->token; + resp.qp_type = my_qp->qp_type; + resp.qkey = my_qp->qkey; + resp.real_qp_num = my_qp->real_qp_num; + /* rqueue properties */ + resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size; + resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg; + resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; + resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; + resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state; + ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000, + ipz_rqueue->queue_length, + (void**)&resp.ipz_rqueue.queue, + &vma); + if (ret) { + EDEB_ERR(4, "Could not mmap rqueue pages"); + goto create_qp_exit3; + } + my_qp->uspace_rqueue = resp.ipz_rqueue.queue; + /* squeue properties */ + resp.ipz_squeue.qe_size = ipz_squeue->qe_size; + resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg; + resp.ipz_squeue.queue_length = ipz_squeue->queue_length; + resp.ipz_squeue.pagesize = ipz_squeue->pagesize; + resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state; + ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000, + ipz_squeue->queue_length, + (void**)&resp.ipz_squeue.queue, + &vma); + if (ret) { + EDEB_ERR(4, "Could not mmap squeue pages"); + goto create_qp_exit4; + } + my_qp->uspace_squeue = resp.ipz_squeue.queue; + /* fw_handle */ + resp.galpas = my_qp->galpas; + ret = ehca_mmap_register(my_qp->galpas.user.fw_handle, + (void**)&resp.galpas.kernel.fw_handle, + &vma); + if (ret) { + EDEB_ERR(4, "Could not mmap fw_handle"); + goto create_qp_exit5; + } + my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; + + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { + EDEB_ERR(4, "Copy to udata failed"); + ret = -EINVAL; + goto create_qp_exit6; + } + } + + EDEB_EX(7, "ehca_qp=%p qp_num=%x, token=%x", + my_qp, qp_nr, my_qp->token); + return &my_qp->ib_qp; + +create_qp_exit6: + ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); + +create_qp_exit5: + ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length); + +create_qp_exit4: + ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length); + +create_qp_exit3: + ipz_queue_dtor(&my_qp->ipz_rqueue); + ipz_queue_dtor(&my_qp->ipz_squeue); + +create_qp_exit2: + hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + +create_qp_exit1: + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + +create_qp_exit0: + kmem_cache_free(ehca_module.cache_qp, my_qp); + EDEB_EX(4, "failed ret=%x", ret); + return ERR_PTR(ret); + +} + +/* + * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts + * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe + * returns total number of bad wqes in bad_wqe_cnt + */ +static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, + int *bad_wqe_cnt) +{ + int ret = 0; + u64 h_ret = H_SUCCESS; + struct ipz_queue *squeue = NULL; + void *bad_send_wqe_p = NULL; + void *bad_send_wqe_v = NULL; + void *squeue_start_p = NULL; + void *squeue_end_p = NULL; + void *squeue_start_v = NULL; + void *squeue_end_v = NULL; + struct ehca_wqe *wqe = NULL; + int qp_num = my_qp->ib_qp.qp_num; + + EDEB_EN(7, "ehca_qp=%p qp_num=%x ", my_qp, qp_num); + + /* get send wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &bad_send_wqe_p, NULL, 2); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_h_disable_and_get_wqe() failed " + "ehca_qp=%p qp_num=%x h_ret=%lx",my_qp, qp_num, h_ret); + ret = ehca2ib_return_code(h_ret); + goto prepare_sqe_rts_exit1; + } + bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63))); + EDEB(7, "qp_num=%x bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); + /* convert wqe pointer to vadr */ + bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p); + EDEB_DMP(6, bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); + squeue = &my_qp->ipz_squeue; + squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L)); + squeue_end_p = squeue_start_p+squeue->queue_length; + squeue_start_v = abs_to_virt((u64)squeue_start_p); + squeue_end_v = abs_to_virt((u64)squeue_end_p); + EDEB(6, "qp_num=%x squeue_start_v=%p squeue_end_v=%p", + qp_num, squeue_start_v, squeue_end_v); + + /* loop sets wqe's purge bit */ + wqe = (struct ehca_wqe*)bad_send_wqe_v; + *bad_wqe_cnt = 0; + while (wqe->optype != 0xff && wqe->wqef != 0xff) { + EDEB_DMP(6, wqe, 32, "qp_num=%x wqe", qp_num); + wqe->nr_of_data_seg = 0; /* suppress data access */ + wqe->wqef = WQEF_PURGE; /* WQE to be purged */ + wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size); + *bad_wqe_cnt = (*bad_wqe_cnt)+1; + if ((void*)wqe >= squeue_end_v) { + wqe = squeue_start_v; + } + } + /* + * bad wqe will be reprocessed and ignored when pol_cq() is called, + * i.e. nr of wqes with flush error status is one less + */ + EDEB(6, "qp_num=%x flusherr_wqe_cnt=%x", qp_num, (*bad_wqe_cnt)-1); + wqe->wqef = 0; + +prepare_sqe_rts_exit1: + + EDEB_EX(7, "ehca_qp=%p qp_num=%x ret=%x", my_qp, qp_num, ret); + return ret; +} + +/* + * internal_modify_qp with circumvention to handle aqp0 properly + * smi_reset2init indicates if this is an internal reset-to-init-call for + * smi. This flag must always be zero if called from ehca_modify_qp()! + * This internal func was intorduced to avoid recursion of ehca_modify_qp()! + */ +static int internal_modify_qp(struct ib_qp *ibqp, + struct ib_qp_attr *attr, + int attr_mask, int smi_reset2init) +{ + enum ib_qp_state qp_cur_state = 0, qp_new_state = 0; + int cnt = 0, qp_attr_idx = 0, ret = 0; + + enum ib_qp_statetrans statetrans; + struct hcp_modify_qp_control_block *mqpcb = NULL; + struct ehca_qp *my_qp = NULL; + struct ehca_shca *shca = NULL; + u64 update_mask = 0; + u64 h_ret = H_SUCCESS; + int bad_wqe_cnt = 0; + int squeue_locked = 0; + unsigned long spl_flags = 0; + + my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + shca = container_of(ibqp->pd->device, struct ehca_shca, ib_device); + + EDEB_EN(7, "ehca_qp=%p qp_num=%x ibqp_type=%x " + "new qp_state=%x attribute_mask=%x", + my_qp, ibqp->qp_num, ibqp->qp_type, + attr->qp_state, attr_mask); + + /* do query_qp to obtain current attr values */ + mqpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (mqpcb == NULL) { + ret = -ENOMEM; + EDEB_ERR(4, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); + goto modify_qp_exit0; + } + + h_ret = hipz_h_query_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + mqpcb, my_qp->galpas.kernel); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, ibqp->qp_num, h_ret); + ret = ehca2ib_return_code(h_ret); + goto modify_qp_exit1; + } + EDEB(7, "ehca_qp=%p qp_num=%x ehca_qp_state=%x", + my_qp, ibqp->qp_num, mqpcb->qp_state); + + qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); + + if (qp_cur_state == -EINVAL) { /* invalid qp state */ + ret = -EINVAL; + EDEB_ERR(4, "Invalid current ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + mqpcb->qp_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + /* + * circumvention to set aqp0 initial state to init + * as expected by IB spec + */ + if (smi_reset2init == 0 && + ibqp->qp_type == IB_QPT_SMI && + qp_cur_state == IB_QPS_RESET && + (attr_mask & IB_QP_STATE) && + attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ + struct ib_qp_attr smiqp_attr = { + .qp_state = IB_QPS_INIT, + .port_num = my_qp->init_attr.port_num, + .pkey_index = 0, + .qkey = 0 + }; + int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | + IB_QP_PKEY_INDEX | IB_QP_QKEY; + int smirc = internal_modify_qp( + ibqp, &smiqp_attr, smiqp_attr_mask, 1); + if (smirc) { + EDEB_ERR(4, "SMI RESET -> INIT failed. " + "ehca_modify_qp() rc=%x", smirc); + ret = H_PARAMETER; + goto modify_qp_exit1; + } + qp_cur_state = IB_QPS_INIT; + EDEB(7, "SMI RESET -> INIT succeeded"); + } + /* is transmitted current state equal to "real" current state */ + if ((attr_mask & IB_QP_CUR_STATE) && + qp_cur_state != attr->cur_qp_state) { + ret = -EINVAL; + EDEB_ERR(4, "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" + " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", + attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + EDEB(7, "ehca_qp=%p qp_num=%x current qp_state=%x " + "new qp_state=%x attribute_mask=%x", + my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); + + qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; + if (!smi_reset2init && + !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, + attr_mask)) { + ret = -EINVAL; + EDEB_ERR(4, "Invalid qp transition new_state=%x cur_state=%x " + "ehca_qp=%p qp_num=%x attr_mask=%x", + qp_new_state, qp_cur_state, my_qp, ibqp->qp_num, + attr_mask); + goto modify_qp_exit1; + } + + if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state))) + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + else { + ret = -EINVAL; + EDEB_ERR(4, "Invalid new qp state=%x " + "ehca_qp=%p qp_num=%x", + qp_new_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + /* retrieve state transition struct to get req and opt attrs */ + statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); + if (statetrans < 0) { + ret = -EINVAL; + EDEB_ERR(4, " qp_cur_state=%x " + "new_qp_state=%x State_xsition=%x " + "ehca_qp=%p qp_num=%x", + qp_cur_state, qp_new_state, + statetrans, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); + + if (qp_attr_idx < 0) { + ret = qp_attr_idx; + EDEB_ERR(4, "Invalid QP type=%x ehca_qp=%p qp_num=%x", + ibqp->qp_type, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + EDEB(7, "ehca_qp=%p qp_num=%x qp_state_xsit=%x", + my_qp, ibqp->qp_num, statetrans); + + /* sqe -> rts: set purge bit of bad wqe before actual trans */ + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* mark next free wqe if kernel */ + if (my_qp->uspace_squeue == 0) { + struct ehca_wqe *wqe = NULL; + /* lock send queue */ + spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + squeue_locked = 1; + /* mark next free wqe */ + wqe = (struct ehca_wqe*) + ipz_qeit_get(&my_qp->ipz_squeue); + wqe->optype = wqe->wqef = 0xff; + EDEB(7, "qp_num=%x next_free_wqe=%p", + ibqp->qp_num, wqe); + } + ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); + if (ret) { + EDEB_ERR(4, "prepare_sqe_rts() failed " + "ehca_qp=%p qp_num=%x ret=%x", + my_qp, ibqp->qp_num, ret); + goto modify_qp_exit2; + } + } + + /* + * enable RDMA_Atomic_Control if reset->init und reliable con + * this is necessary since gen2 does not provide that flag, + * but pHyp requires it + */ + if (statetrans == IB_QPST_RESET2INIT && + (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { + mqpcb->rdma_atomic_ctrl = 3; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); + } + /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ + if (statetrans == IB_QPST_INIT2RTR && + (ibqp->qp_type == IB_QPT_UC) && + !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { + mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + mqpcb->prim_p_key_idx = attr->pkey_index; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); + EDEB(7, "ehca_qp=%p qp_num=%x " + "IB_QP_PKEY_INDEX update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + if (attr_mask & IB_QP_PORT) { + if (attr->port_num < 1 || attr->port_num > shca->num_ports) { + ret = -EINVAL; + EDEB_ERR(4, "Invalid port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + mqpcb->prim_phys_port = attr->port_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_PORT update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + if (attr_mask & IB_QP_QKEY) { + mqpcb->qkey = attr->qkey; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_QKEY update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + if (attr_mask & IB_QP_AV) { + int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate); + int ehca_mult = ib_rate_to_mult(shca->sport[my_qp-> + init_attr.port_num].rate); + + mqpcb->dlid = attr->ah_attr.dlid; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); + mqpcb->source_path_bits = attr->ah_attr.src_path_bits; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); + mqpcb->service_level = attr->ah_attr.sl; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); + + if (ah_mult < ehca_mult) + mqpcb->max_static_rate = (ah_mult > 0) ? + ((ehca_mult - 1) / ah_mult) : 0; + else + mqpcb->max_static_rate = 0; + + EDEB(7, " ipd=mqpcb->max_static_rate set %x " + " ah_mult=%x ehca_mult=%x " + " attr->ah_attr.static_rate=%x", + mqpcb->max_static_rate,ah_mult,ehca_mult, + attr->ah_attr.static_rate); + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag = 1 << 31; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid.byte[cnt] = + attr->ah_attr.grh.dgid.raw[cnt]; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); + mqpcb->flow_label = attr->ah_attr.grh.flow_label; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); + mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); + mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); + } + + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_AV update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + + if (attr_mask & IB_QP_PATH_MTU) { + mqpcb->path_mtu = attr->path_mtu; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_PATH_MTU update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + if (attr_mask & IB_QP_TIMEOUT) { + mqpcb->timeout = attr->timeout; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_TIMEOUT update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + if (attr_mask & IB_QP_RETRY_CNT) { + mqpcb->retry_count = attr->retry_cnt; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_RETRY_CNT update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + if (attr_mask & IB_QP_RNR_RETRY) { + mqpcb->rnr_retry_count = attr->rnr_retry; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_RNR_RETRY update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + if (attr_mask & IB_QP_RQ_PSN) { + mqpcb->receive_psn = attr->rq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_RQ_PSN update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? + attr->max_dest_rd_atomic : 2; /* max is 2 */ + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_MAX_DEST_RD_ATOMIC " + "update_mask=%lx", my_qp, ibqp->qp_num, update_mask); + } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic; + update_mask |= + EHCA_BMASK_SET + (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_MAX_QP_RD_ATOMIC " + "update_mask=%lx", my_qp, ibqp->qp_num, update_mask); + } + if (attr_mask & IB_QP_ALT_PATH) { + int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate); + int ehca_mult = ib_rate_to_mult( + shca->sport[my_qp->init_attr.port_num].rate); + + mqpcb->dlid_al = attr->alt_ah_attr.dlid; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1); + mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1); + mqpcb->service_level_al = attr->alt_ah_attr.sl; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1); + + if (ah_mult < ehca_mult) + mqpcb->max_static_rate = (ah_mult > 0) ? + ((ehca_mult - 1) / ah_mult) : 0; + else + mqpcb->max_static_rate_al = 0; + + EDEB(7, " ipd=mqpcb->max_static_rate set %x," + " ah_mult=%x ehca_mult=%x", + mqpcb->max_static_rate,ah_mult,ehca_mult); + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag_al = 1 << 31; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); + mqpcb->source_gid_idx_al = + attr->alt_ah_attr.grh.sgid_index; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1); + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid_al.byte[cnt] = + attr->alt_ah_attr.grh.dgid.raw[cnt]; + + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1); + mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1); + mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1); + mqpcb->traffic_class_al = + attr->alt_ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); + } + + EDEB(7, "ehca_qp=%p qp_num=%x IB_QP_ALT_PATH update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); + EDEB(7, "ehca_qp=%p qp_num=%x " + "IB_QP_MIN_RNR_TIMER update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + + if (attr_mask & IB_QP_SQ_PSN) { + mqpcb->send_psn = attr->sq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); + EDEB(7, "ehca_qp=%p qp_num=%x " + "IB_QP_SQ_PSN update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + + if (attr_mask & IB_QP_DEST_QPN) { + mqpcb->dest_qp_nr = attr->dest_qp_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); + EDEB(7, "ehca_qp=%p qp_num=%x " + "IB_QP_DEST_QPN update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + mqpcb->path_migration_state = attr->path_mig_state; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); + EDEB(7, "ehca_qp=%p qp_num=%x " + "IB_QP_PATH_MIG_STATE update_mask=%lx", my_qp, + ibqp->qp_num, update_mask); + } + + if (attr_mask & IB_QP_CAP) { + mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); + mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); + EDEB(7, "ehca_qp=%p qp_num=%x " + "IB_QP_CAP update_mask=%lx", + my_qp, ibqp->qp_num, update_mask); + /* no support for max_send/recv_sge yet */ + } + + EDEB_DMP(7, mqpcb, 4*70, "ehca_qp=%p qp_num=%x", my_qp, ibqp->qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + EDEB_ERR(4, "hipz_h_modify_qp() failed rc=%lx " + "ehca_qp=%p qp_num=%x", + h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* doorbell to reprocessing wqes */ + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, bad_wqe_cnt-1); + EDEB(6, "doorbell for %x wqes", bad_wqe_cnt); + } + + if (statetrans == IB_QPST_RESET2INIT || + statetrans == IB_QPST_INIT2INIT) { + mqpcb->qp_enable = 1; + mqpcb->qp_state = EHCA_QPS_INIT; + update_mask = 0; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + + EDEB(7, "ehca_qp=%p qp_num=%x " + "RESET_2_INIT needs an additional enable " + "-> update_mask=%lx", my_qp, ibqp->qp_num, update_mask); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + EDEB_ERR(4, "ENABLE in context of " + "RESET_2_INIT failed! " + "Maybe you didn't get a LID" + "h_ret=%lx ehca_qp=%p qp_num=%x", + h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + } + + if (statetrans == IB_QPST_ANY2RESET) { + ipz_qeit_reset(&my_qp->ipz_rqueue); + ipz_qeit_reset(&my_qp->ipz_squeue); + } + + if (attr_mask & IB_QP_QKEY) + my_qp->qkey = attr->qkey; + +modify_qp_exit2: + if (squeue_locked) { /* this means: sqe -> rts */ + spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); + my_qp->sqerr_purgeflag = 1; + } + +modify_qp_exit1: + kfree(mqpcb); + +modify_qp_exit0: + EDEB_EX(7, "ehca_qp=%p qp_num=%x ibqp_type=%x ret=%x", + my_qp, ibqp->qp_num, ibqp->qp_type, ret); + return ret; +} + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) +{ + int ret = 0; + struct ehca_qp *my_qp = NULL; + struct ehca_pd *my_pd = NULL; + u32 cur_pid = current->tgid; + + EHCA_CHECK_ADR(ibqp); + EHCA_CHECK_ADR(attr); + EHCA_CHECK_ADR(ibqp->device); + + my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + + EDEB_EN(7, "ehca_qp=%p qp_num=%x ibqp_type=%x attr_mask=%x", + my_qp, ibqp->qp_num, ibqp->qp_type, attr_mask); + + my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + } else + ret = internal_modify_qp(ibqp, attr, attr_mask, 0); + + EDEB_EX(7, "ehca_qp=%p qp_num=%x ibqp_type=%x ret=%x", + my_qp, ibqp->qp_num, ibqp->qp_type, ret); + return ret; +} + +int ehca_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct ehca_qp *my_qp = NULL; + struct ehca_shca *shca = NULL; + struct hcp_modify_qp_control_block *qpcb = NULL; + struct ipz_adapter_handle adapter_handle; + struct ehca_pd *my_pd = NULL; + u32 cur_pid = current->tgid; + int cnt = 0, ret = 0; + u64 h_ret = H_SUCCESS; + + EHCA_CHECK_ADR(qp); + EHCA_CHECK_ADR(qp_attr); + EHCA_CHECK_DEVICE(qp->device); + + my_qp = container_of(qp, struct ehca_qp, ib_qp); + + EDEB_EN(7, "ehca_qp=%p qp_num=%x " + "qp_attr=%p qp_attr_mask=%x qp_init_attr=%p", + my_qp, qp->qp_num, qp_attr, qp_attr_mask, qp_init_attr); + + my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + goto query_qp_exit0; + } + + shca = container_of(qp->device, struct ehca_shca, ib_device); + adapter_handle = shca->ipz_hca_handle; + + if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { + ret = -EINVAL; + EDEB_ERR(4,"Invalid attribute mask " + "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", + my_qp, qp->qp_num, qp_attr_mask); + goto query_qp_exit0; + } + + qpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL ); + if (!qpcb) { + ret = -ENOMEM; + EDEB_ERR(4,"Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); + goto query_qp_exit0; + } + + h_ret = hipz_h_query_qp(adapter_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + EDEB_ERR(4,"hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, qp->qp_num, h_ret); + goto query_qp_exit1; + } + + qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); + qp_attr->qp_state = qp_attr->cur_qp_state; + if (qp_attr->cur_qp_state == -EINVAL) { + ret = -EINVAL; + EDEB_ERR(4,"Got invalid ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + qpcb->qp_state, my_qp, qp->qp_num); + goto query_qp_exit1; + } + + if (qp_attr->qp_state == IB_QPS_SQD) + qp_attr->sq_draining = 1; + + qp_attr->qkey = qpcb->qkey; + qp_attr->path_mtu = qpcb->path_mtu; + qp_attr->path_mig_state = qpcb->path_migration_state; + qp_attr->rq_psn = qpcb->receive_psn; + qp_attr->sq_psn = qpcb->send_psn; + qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; + qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; + qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; + /* UD_AV CIRCUMVENTION */ + if (my_qp->qp_type == IB_QPT_UD) { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe - 2; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe - 2; + } else { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe; + } + + qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; + qp_attr->dest_qp_num = qpcb->dest_qp_nr; + + qp_attr->pkey_index = + EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx); + + qp_attr->port_num = + EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port); + + qp_attr->timeout = qpcb->timeout; + qp_attr->retry_cnt = qpcb->retry_count; + qp_attr->rnr_retry = qpcb->rnr_retry_count; + + qp_attr->alt_pkey_index = + EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx); + + qp_attr->alt_port_num = qpcb->alt_phys_port; + qp_attr->alt_timeout = qpcb->timeout_al; + + /* primary av */ + qp_attr->ah_attr.sl = qpcb->service_level; + + if (qpcb->send_grh_flag) { + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->ah_attr.static_rate = qpcb->max_static_rate; + qp_attr->ah_attr.dlid = qpcb->dlid; + qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; + qp_attr->ah_attr.port_num = qp_attr->port_num; + + /* primary GRH */ + qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; + qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; + qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; + qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid.byte[cnt]; + + /* alternate AV */ + qp_attr->alt_ah_attr.sl = qpcb->service_level_al; + if (qpcb->send_grh_flag_al) { + qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; + qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; + qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; + + /* alternate GRH */ + qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; + qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; + qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; + qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid_al.byte[cnt]; + + /* return init attributes given in ehca_create_qp */ + if (qp_init_attr) + *qp_init_attr = my_qp->init_attr; + + EDEB(7, "ehca_qp=%p qp_number=%x dest_qp_number=%x " + "dlid=%x path_mtu=%x dest_gid=%lx_%lx " + "service_level=%x qp_state=%x", + my_qp, qpcb->qp_number, qpcb->dest_qp_nr, + qpcb->dlid, qpcb->path_mtu, + qpcb->dest_gid.dw[0], qpcb->dest_gid.dw[1], + qpcb->service_level, qpcb->qp_state); + + EDEB_DMP(7, qpcb, 4*70, "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); + +query_qp_exit1: + kfree(qpcb); + +query_qp_exit0: + EDEB_EX(7, "ehca_qp=%p qp_num=%x ret=%x", + my_qp, qp->qp_num, ret); + return ret; +} + +int ehca_destroy_qp(struct ib_qp *ibqp) +{ + extern struct ehca_module ehca_module; + struct ehca_qp *my_qp = NULL; + struct ehca_shca *shca = NULL; + struct ehca_pfqp *qp_pf = NULL; + struct ehca_pd *my_pd = NULL; + u32 cur_pid = current->tgid; + u32 qp_num = 0; + int ret = 0; + u64 h_ret = H_SUCCESS; + u8 port_num = 0; + enum ib_qp_type qp_type; + unsigned long flags; + + EHCA_CHECK_ADR(ibqp); + + my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + qp_num = ibqp->qp_num; + qp_pf = &my_qp->pf; + + shca = container_of(ibqp->device, struct ehca_shca, ib_device); + + EDEB_EN(7, "ehca_qp=%p qp_num=%x", my_qp, ibqp->qp_num); + + my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + if (my_qp->send_cq) { + ret = ehca_cq_unassign_qp(my_qp->send_cq, + my_qp->real_qp_num); + if (ret) { + EDEB_ERR(4, "Couldn't unassign qp from send_cq " + "ret=%x qp_num=%x cq_num=%x", + ret, my_qp->ib_qp.qp_num, + my_qp->send_cq->cq_number); + goto destroy_qp_exit0; + } + } + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* un-mmap if vma alloc */ + if (my_qp->uspace_rqueue) { + ret = ehca_munmap(my_qp->uspace_rqueue, + my_qp->ipz_rqueue.queue_length); + ret = ehca_munmap(my_qp->uspace_squeue, + my_qp->ipz_squeue.queue_length); + ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); + } + + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + if (h_ret != H_SUCCESS) { + EDEB_ERR(4, "hipz_h_destroy_qp() failed " + "rc=%lx ehca_qp=%p qp_num=%x", + h_ret, qp_pf, qp_num); + goto destroy_qp_exit0; + } + + port_num = my_qp->init_attr.port_num; + qp_type = my_qp->init_attr.qp_type; + + /* no support for IB_QPT_SMI yet */ + if (qp_type == IB_QPT_GSI) { + struct ib_event event; + + EDEB(4, "device %s: port %x is inactive.", + shca->ib_device.name, port_num); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port_num; + shca->sport[port_num - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + } + + ipz_queue_dtor(&my_qp->ipz_rqueue); + ipz_queue_dtor(&my_qp->ipz_squeue); + kmem_cache_free(ehca_module.cache_qp, my_qp); + +destroy_qp_exit0: + ret = ehca2ib_return_code(h_ret); + EDEB_EX(7,"ret=%x", ret); + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c new file mode 100644 index 0000000..908479e --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -0,0 +1,689 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * post_send/recv, poll_cq, req_notify + * + * Authors: Waleri Fomin + * Hoang-Nam Nguyen + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#define DEB_PREFIX "reqs" + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, + struct ehca_wqe *wqe_p, + struct ib_recv_wr *recv_wr) +{ + u8 cnt_ds; + if (unlikely((recv_wr->num_sge < 0) || + (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { + EDEB_ERR(4, "Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = recv_wr->wr_id; + wqe_p->nr_of_data_seg = recv_wr->num_sge; + + for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { + wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = + recv_wr->sg_list[cnt_ds].addr; + wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = + recv_wr->sg_list[cnt_ds].lkey; + wqe_p->u.all_rcv.sg_list[cnt_ds].length = + recv_wr->sg_list[cnt_ds].length; + } + + if (IS_EDEB_ON(7)) { + EDEB(7, "RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); + EDEB_DMP(7, wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + } + + return 0; +} + +#if defined(DEBUG_GSI_SEND_WR) + +/* need ib_mad struct */ +#include + +static void trace_send_wr_ud(const struct ib_send_wr *send_wr) +{ + int idx = 0; + int j = 0; + while (send_wr) { + struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; + struct ib_sge *sge = send_wr->sg_list; + EDEB(4, "send_wr#%x wr_id=%lx num_sge=%x " + "send_flags=%x opcode=%x",idx, send_wr->wr_id, + send_wr->num_sge, send_wr->send_flags, send_wr->opcode); + if (mad_hdr) { + EDEB(4, "send_wr#%x mad_hdr base_version=%x " + "mgmt_class=%x class_version=%x method=%x " + "status=%x class_specific=%x tid=%lx attr_id=%x " + "resv=%x attr_mod=%x", + idx, mad_hdr->base_version, mad_hdr->mgmt_class, + mad_hdr->class_version, mad_hdr->method, + mad_hdr->status, mad_hdr->class_specific, + mad_hdr->tid, mad_hdr->attr_id, mad_hdr->resv, + mad_hdr->attr_mod); + } + for (j = 0; j < send_wr->num_sge; j++) { + u8 *data = (u8 *) abs_to_virt(sge->addr); + EDEB(4, "send_wr#%x sge#%x addr=%p length=%x lkey=%x", + idx, j, data, sge->length, sge->lkey); + /* assume length is n*16 */ + EDEB_DMP(4, data, sge->length, "send_wr#%x sge#%x", + idx, j); + sge++; + } /* eof for j */ + idx++; + send_wr = send_wr->next; + } /* eof while send_wr */ +} + +#endif /* DEBUG_GSI_SEND_WR */ + +static inline int ehca_write_swqe(struct ehca_qp *qp, + struct ehca_wqe *wqe_p, + const struct ib_send_wr *send_wr) +{ + u32 idx; + u64 dma_length; + struct ehca_av *my_av; + u32 remote_qkey = send_wr->wr.ud.remote_qkey; + + if (unlikely((send_wr->num_sge < 0) || + (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { + EDEB_ERR(4, "Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = send_wr->wr_id; + + switch (send_wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_SEND; + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_RDMAWRITE; + break; + case IB_WR_RDMA_READ: + wqe_p->optype = WQE_OPTYPE_RDMAREAD; + break; + default: + EDEB_ERR(4, "Invalid opcode=%x", send_wr->opcode); + return -EINVAL; /* invalid opcode */ + } + + wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; + + wqe_p->wr_flag = 0; + + if (send_wr->send_flags & IB_SEND_SIGNALED) + wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; + + if (send_wr->opcode == IB_WR_SEND_WITH_IMM || + send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + /* this might not work as long as HW does not support it */ + wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data); + wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; + } + + wqe_p->nr_of_data_seg = send_wr->num_sge; + + switch (qp->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + /* no break is intential here */ + case IB_QPT_UD: + /* IB 1.2 spec C10-15 compliance */ + if (send_wr->wr.ud.remote_qkey & 0x80000000) + remote_qkey = qp->qkey; + + wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; + wqe_p->local_ee_context_qkey = remote_qkey; + if (!send_wr->wr.ud.ah) { + EDEB_ERR(4, "wr.ud.ah is NULL. qp=%p", qp); + return -EINVAL; + } + my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); + wqe_p->u.ud_av.ud_av = my_av->av; + + /* + * omitted check of IB_SEND_INLINE + * since HW does not support it + */ + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.ud_av.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.ud_av.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.ud_av.sg_list[idx].length = + send_wr->sg_list[idx].length; + } /* eof for idx */ + if (qp->qp_type == IB_QPT_SMI || + qp->qp_type == IB_QPT_GSI) + wqe_p->u.ud_av.ud_av.pmtu = 1; + if (qp->qp_type == IB_QPT_GSI) { + wqe_p->pkeyi = send_wr->wr.ud.pkey_index; +#ifdef DEBUG_GSI_SEND_WR + trace_send_wr_ud(send_wr); +#endif /* DEBUG_GSI_SEND_WR */ + } + break; + + case IB_QPT_UC: + if (send_wr->send_flags & IB_SEND_FENCE) + wqe_p->wr_flag |= WQE_WRFLAG_FENCE; + /* no break is intentional here */ + case IB_QPT_RC: + /* TODO: atomic not implemented */ + wqe_p->u.nud.remote_virtual_adress = + send_wr->wr.rdma.remote_addr; + wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; + + /* + * omitted checking of IB_SEND_INLINE + * since HW does not support it + */ + dma_length = 0; + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.nud.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.nud.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.nud.sg_list[idx].length = + send_wr->sg_list[idx].length; + dma_length += send_wr->sg_list[idx].length; + } /* eof idx */ + wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + + break; + + default: + EDEB_ERR(4, "Invalid qptype=%x", qp->qp_type); + return -EINVAL; + } + + if (IS_EDEB_ON(7)) { + EDEB(7, "SEND WQE written into queue qp=%p ", qp); + EDEB_DMP(7, wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); + } + return 0; +} + +/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ +static inline void map_ib_wc_status(u32 cqe_status, + enum ib_wc_status *wc_status) +{ + if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { + switch (cqe_status & 0x3F) { + case 0x01: + case 0x21: + *wc_status = IB_WC_LOC_LEN_ERR; + break; + case 0x02: + case 0x22: + *wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case 0x03: + case 0x23: + *wc_status = IB_WC_LOC_EEC_OP_ERR; + break; + case 0x04: + case 0x24: + *wc_status = IB_WC_LOC_PROT_ERR; + break; + case 0x05: + case 0x25: + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + case 0x06: + *wc_status = IB_WC_MW_BIND_ERR; + break; + case 0x07: /* remote error - look into bits 20:24 */ + switch ((cqe_status + & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { + case 0x0: + /* + * PSN Sequence Error! + * couldn't find a matching status! + */ + *wc_status = IB_WC_GENERAL_ERR; + break; + case 0x1: + *wc_status = IB_WC_REM_INV_REQ_ERR; + break; + case 0x2: + *wc_status = IB_WC_REM_ACCESS_ERR; + break; + case 0x3: + *wc_status = IB_WC_REM_OP_ERR; + break; + case 0x4: + *wc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + } + break; + case 0x08: + *wc_status = IB_WC_RETRY_EXC_ERR; + break; + case 0x09: + *wc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case 0x0A: + case 0x2D: + *wc_status = IB_WC_REM_ABORT_ERR; + break; + case 0x0B: + case 0x2E: + *wc_status = IB_WC_INV_EECN_ERR; + break; + case 0x0C: + case 0x2F: + *wc_status = IB_WC_INV_EEC_STATE_ERR; + break; + case 0x0D: + *wc_status = IB_WC_BAD_RESP_ERR; + break; + case 0x10: + /* WQE purged */ + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + default: + *wc_status = IB_WC_FATAL_ERR; + + } + } else + *wc_status = IB_WC_SUCCESS; +} + +int ehca_post_send(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) +{ + struct ehca_qp *my_qp = NULL; + struct ib_send_wr *cur_send_wr = NULL; + struct ehca_wqe *wqe_p = NULL; + int wqe_cnt = 0; + int ret = 0; + unsigned long spl_flags = 0; + + EHCA_CHECK_ADR(qp); + my_qp = container_of(qp, struct ehca_qp, ib_qp); + EHCA_CHECK_QP(my_qp); + EHCA_CHECK_ADR(send_wr); + EDEB_EN(7, "ehca_qp=%p qp_num=%x send_wr=%p bad_send_wr=%p", + my_qp, qp->qp_num, send_wr, bad_send_wr); + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + + /* loop processes list of send reqs */ + for (cur_send_wr = send_wr; cur_send_wr != NULL; + cur_send_wr = cur_send_wr->next) { + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + if (bad_send_wr) + *bad_send_wr = cur_send_wr; + if (wqe_cnt == 0) { + ret = -ENOMEM; + EDEB_ERR(4, "Too many posted WQEs qp_num=%x", + qp->qp_num); + } + goto post_send_exit0; + } + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + *bad_send_wr = cur_send_wr; + if (wqe_cnt == 0) { + ret = -EINVAL; + EDEB_ERR(4, "Could not write WQE qp_num=%x", + qp->qp_num); + } + goto post_send_exit0; + } + wqe_cnt++; + EDEB(7, "ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, qp->qp_num, wqe_cnt); + } /* eof for cur_send_wr */ + +post_send_exit0: + /* UNLOCK the QUEUE */ + spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, wqe_cnt); + EDEB_EX(7, "ehca_qp=%p qp_num=%x ret=%x wqe_cnt=%d", + my_qp, qp->qp_num, ret, wqe_cnt); + return ret; +} + +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_qp *my_qp = NULL; + struct ib_recv_wr *cur_recv_wr = NULL; + struct ehca_wqe *wqe_p = NULL; + int wqe_cnt = 0; + int ret = 0; + unsigned long spl_flags = 0; + + EHCA_CHECK_ADR(qp); + my_qp = container_of(qp, struct ehca_qp, ib_qp); + EHCA_CHECK_QP(my_qp); + EHCA_CHECK_ADR(recv_wr); + EDEB_EN(7, "ehca_qp=%p qp_num=%x recv_wr=%p bad_recv_wr=%p", + my_qp, qp->qp_num, recv_wr, bad_recv_wr); + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_r, spl_flags); + + /* loop processes list of send reqs */ + for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; + cur_recv_wr = cur_recv_wr->next) { + u64 start_offset = my_qp->ipz_rqueue.current_q_offset; + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + if (bad_recv_wr) + *bad_recv_wr = cur_recv_wr; + if (wqe_cnt == 0) { + ret = -ENOMEM; + EDEB_ERR(4, "Too many posted WQEs qp_num=%x", + qp->qp_num); + } + goto post_recv_exit0; + } + /* write a RECV WQE into the QUEUE */ + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, + cur_recv_wr); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_rqueue.current_q_offset = start_offset; + *bad_recv_wr = cur_recv_wr; + if (wqe_cnt == 0) { + ret = -EINVAL; + EDEB_ERR(4, "Could not write WQE qp_num=%x", + qp->qp_num); + } + goto post_recv_exit0; + } + wqe_cnt++; + EDEB(7, "ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, qp->qp_num, wqe_cnt); + } /* eof for cur_recv_wr */ + +post_recv_exit0: + spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags); + iosync(); /* serialize GAL register access */ + hipz_update_rqa(my_qp, wqe_cnt); + EDEB_EX(7, "ehca_qp=%p qp_num=%x ret=%x wqe_cnt=%d", + my_qp, qp->qp_num, ret, wqe_cnt); + return ret; +} + +/* + * ib_wc_opcode table converts ehca wc opcode to ib + * Since we use zero to indicate invalid opcode, the actual ib opcode must + * be decremented!!! + */ +static const u8 ib_wc_opcode[255] = { + [0x01] = IB_WC_RECV+1, + [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, + [0x04] = IB_WC_BIND_MW+1, + [0x08] = IB_WC_FETCH_ADD+1, + [0x10] = IB_WC_COMP_SWAP+1, + [0x20] = IB_WC_RDMA_WRITE+1, + [0x40] = IB_WC_RDMA_READ+1, + [0x80] = IB_WC_SEND+1 +}; + +/* internal function to poll one entry of cq */ +static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) +{ + int ret = 0; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + struct ehca_cqe *cqe = NULL; + int cqe_count = 0; + + EDEB_EN(7, "ehca_cq=%p cq_num=%x wc=%p", my_cq, my_cq->cq_number, wc); + +poll_cq_one_read_cqe: + cqe = (struct ehca_cqe *) + ipz_qeit_get_inc_valid(&my_cq->ipz_queue); + if (!cqe) { + ret = -EAGAIN; + EDEB(7, "Completion queue is empty ehca_cq=%p cq_num=%x " + "ret=%x", my_cq, my_cq->cq_number, ret); + goto poll_cq_one_exit0; + } + cqe_count++; + if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { + struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); + int purgeflag = 0; + unsigned long spl_flags = 0; + if (!qp) { + EDEB_ERR(4, "cq_num=%x qp_num=%x " + "could not find qp -> ignore cqe", + my_cq->cq_number, cqe->local_qp_number); + EDEB_DMP(4, cqe, 64, "cq_num=%x qp_num=%x", + my_cq->cq_number, cqe->local_qp_number); + /* ignore this purged cqe */ + goto poll_cq_one_read_cqe; + } + spin_lock_irqsave(&qp->spinlock_s, spl_flags); + purgeflag = qp->sqerr_purgeflag; + spin_unlock_irqrestore(&qp->spinlock_s, spl_flags); + + if (purgeflag) { + EDEB(6, "Got CQE with purged bit qp_num=%x src_qp=%x", + cqe->local_qp_number, cqe->remote_qp_number); + EDEB_DMP(6, cqe, 64, "qp_num=%x src_qp=%x", + cqe->local_qp_number, cqe->remote_qp_number); + /* + * ignore this to avoid double cqes of bad wqe + * that caused sqe and turn off purge flag + */ + qp->sqerr_purgeflag = 0; + goto poll_cq_one_read_cqe; + } + } + + /* tracing cqe */ + if (IS_EDEB_ON(7)) { + EDEB(7, "Received COMPLETION ehca_cq=%p cq_num=%x -----", + my_cq, my_cq->cq_number); + EDEB_DMP(7, cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + EDEB(7, "ehca_cq=%p cq_num=%x -------------------------", + my_cq, my_cq->cq_number); + } + + /* we got a completion! */ + wc->wr_id = cqe->work_request_id; + + /* eval ib_wc_opcode */ + wc->opcode = ib_wc_opcode[cqe->optype]-1; + if (unlikely(wc->opcode == -1)) { + EDEB_ERR(4, "Invalid cqe->OPType=%x cqe->status=%x " + "ehca_cq=%p cq_num=%x", + cqe->optype, cqe->status, my_cq, my_cq->cq_number); + /* dump cqe for other infos */ + EDEB_DMP(4, cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + /* update also queue adder to throw away this entry!!! */ + goto poll_cq_one_exit0; + } + /* eval ib_wc_status */ + if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) { + /* complete with errors */ + map_ib_wc_status(cqe->status, &wc->status); + wc->vendor_err = wc->status; + } else + wc->status = IB_WC_SUCCESS; + + wc->qp_num = cqe->local_qp_number; + wc->byte_len = cqe->nr_bytes_transferred; + wc->pkey_index = cqe->pkey_index; + wc->slid = cqe->rlid; + wc->dlid_path_bits = cqe->dlid; + wc->src_qp = cqe->remote_qp_number; + wc->wc_flags = cqe->w_completion_flags; + wc->imm_data = cpu_to_be32(cqe->immediate_data); + wc->sl = cqe->service_level; + + if (wc->status != IB_WC_SUCCESS) + EDEB(6, "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe " + "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx cqe=%p", + my_cq, my_cq->cq_number, cqe->optype, cqe->status, + cqe->local_qp_number, cqe->remote_qp_number, + cqe->work_request_id, cqe); + +poll_cq_one_exit0: + if (cqe_count > 0) + hipz_update_feca(my_cq, cqe_count); + + EDEB_EX(7, "ret=%x ehca_cq=%p cq_number=%x wc=%p " + "status=%x opcode=%x qp_num=%x byte_len=%x", + ret, my_cq, my_cq->cq_number, wc, wc->status, + wc->opcode, wc->qp_num, wc->byte_len); + + return ret; +} + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) +{ + struct ehca_cq *my_cq = NULL; + int nr = 0; + struct ib_wc *current_wc = NULL; + int ret = 0; + unsigned long spl_flags = 0; + + EHCA_CHECK_CQ(cq); + EHCA_CHECK_ADR(wc); + + my_cq = container_of(cq, struct ehca_cq, ib_cq); + EHCA_CHECK_CQ(my_cq); + + EDEB_EN(7, "ehca_cq=%p cq_num=%x num_entries=%d wc=%p", + my_cq, my_cq->cq_number, num_entries, wc); + + if (num_entries < 1) { + EDEB_ERR(4, "Invalid num_entries=%d ehca_cq=%p cq_num=%x", + num_entries, my_cq, my_cq->cq_number); + ret = -EINVAL; + goto poll_cq_exit0; + } + + current_wc = wc; + spin_lock_irqsave(&my_cq->spinlock, spl_flags); + for (nr = 0; nr < num_entries; nr++) { + ret = ehca_poll_cq_one(cq, current_wc); + if (ret) + break; + current_wc++; + } /* eof for nr */ + spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + if (ret == -EAGAIN || !ret) + ret = nr; + +poll_cq_exit0: + EDEB_EX(7, "ehca_cq=%p cq_num=%x ret=%x wc=%p nr_entries=%d", + my_cq, my_cq->cq_number, ret, wc, nr); + + return ret; +} + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify) +{ + struct ehca_cq *my_cq = NULL; + int ret = 0; + + EHCA_CHECK_CQ(cq); + my_cq = container_of(cq, struct ehca_cq, ib_cq); + EHCA_CHECK_CQ(my_cq); + EDEB_EN(7, "ehca_cq=%p cq_num=%x cq_notif=%x", + my_cq, my_cq->cq_number, cq_notify); + + switch (cq_notify) { + case IB_CQ_SOLICITED: + hipz_set_cqx_n0(my_cq, 1); + break; + case IB_CQ_NEXT_COMP: + hipz_set_cqx_n1(my_cq, 1); + break; + default: + return -EINVAL; + } + + EDEB_EX(7, "ehca_cq=%p cq_num=%x ret=%x", + my_cq, my_cq->cq_number, ret); + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c new file mode 100644 index 0000000..d2c5552 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -0,0 +1,123 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * SQP functions + * + * Authors: Khadija Souissi + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#define DEB_PREFIX "e_qp" + +#include +#include +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + + +extern int ehca_create_aqp1(struct ehca_shca *shca, struct ehca_sport *sport); +extern int ehca_destroy_aqp1(struct ehca_sport *sport); + +extern int ehca_port_act_time; + +/** + * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue + * pair is created successfully, the corresponding port gets active. + * + * Define Special Queue pair 0 (SMI QP) is still not supported. + * + * @qp_init_attr: Queue pair init attributes with port and queue pair type + */ + +u64 ehca_define_sqp(struct ehca_shca *shca, + struct ehca_qp *ehca_qp, + struct ib_qp_init_attr *qp_init_attr) +{ + + u32 pma_qp_nr = 0; + u32 bma_qp_nr = 0; + u64 ret = H_SUCCESS; + u8 port = qp_init_attr->port_num; + int counter = 0; + + EDEB_EN(7, "port=%x qp_type=%x", + port, qp_init_attr->qp_type); + + shca->sport[port - 1].port_state = IB_PORT_DOWN; + + switch (qp_init_attr->qp_type) { + case IB_QPT_SMI: + /* function not supported yet */ + break; + case IB_QPT_GSI: + ret = hipz_h_define_aqp1(shca->ipz_hca_handle, + ehca_qp->ipz_qp_handle, + ehca_qp->galpas.kernel, + (u32) qp_init_attr->port_num, + &pma_qp_nr, &bma_qp_nr); + + if (ret != H_SUCCESS) { + EDEB_ERR(4, "Can't define AQP1 for port %x. rc=%lx", + port, ret); + goto ehca_define_aqp1; + } + break; + default: + ret = H_PARAMETER; + goto ehca_define_aqp1; + } + + while ((shca->sport[port - 1].port_state != IB_PORT_ACTIVE) && + (counter < ehca_port_act_time)) { + EDEB(6, "... wait until port %x is active", + port); + msleep_interruptible(1000); + counter++; + } + + if (counter == ehca_port_act_time) { + EDEB_ERR(4, "Port %x is not active.", port); + ret = H_HARDWARE; + } + +ehca_define_aqp1: + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h new file mode 100644 index 0000000..930e46f --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -0,0 +1,415 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * auxiliary functions + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Khadija Souissi + * Waleri Fomin + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef EHCA_TOOLS_H +#define EHCA_TOOLS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define EHCA_EDEB_TRACE_MASK_SIZE 32 +extern u8 ehca_edeb_mask[EHCA_EDEB_TRACE_MASK_SIZE]; +#define EDEB_ID_TO_U32(str4) (str4[3] | (str4[2] << 8) | (str4[1] << 16) | \ + (str4[0] << 24)) + +static inline u64 ehca_edeb_filter(const u32 level, + const u32 id, const u32 line) +{ + u64 ret = 0; + u32 filenr = 0; + u32 filter_level = 9; + u32 dynamic_level = 0; + + /* + * This is code written for the gcc -O2 optimizer + * which should collapse to two single ints. + * Filter_level is the first level kicked out by + * compiler and means trace everything below 6. + */ + + if (id == EDEB_ID_TO_U32("ehav")) { + filenr = 0x01; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("clas")) { + filenr = 0x02; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("cqeq")) { + filenr = 0x03; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("shca")) { + filenr = 0x05; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("eirq")) { + filenr = 0x06; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("lMad")) { + filenr = 0x07; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("mcas")) { + filenr = 0x08; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("mrmw")) { + filenr = 0x09; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("vpd ")) { + filenr = 0x0a; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("e_qp")) { + filenr = 0x0b; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("uqes")) { + filenr = 0x0c; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("PHYP")) { + filenr = 0x0d; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("hcpi")) { + filenr = 0x0e; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("iptz")) { + filenr = 0x0f; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("spta")) { + filenr = 0x10; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("simp")) { + filenr = 0x11; + filter_level = 8; + } + if (id == EDEB_ID_TO_U32("reqs")) { + filenr = 0x12; + filter_level = 8; + } + + if ((filenr - 1) > sizeof(ehca_edeb_mask)) { + filenr = 0; + } + + if (filenr == 0) { + filter_level = 9; + } /* default */ + ret = filenr * 0x10000 + line; + if (filter_level <= level) { + return ret | 0x100000000L; /* this is the flag to not trace */ + } + dynamic_level = ehca_edeb_mask[filenr]; + if (likely(dynamic_level <= level)) { + ret = ret | 0x100000000L; + }; + return ret; +} + +#ifdef EHCA_USE_HCALL_KERNEL +#ifdef CONFIG_PPC_PSERIES + +#include + +/* + * IS_EDEB_ON - Checks if debug is on for the given level. + */ +#define IS_EDEB_ON(level) \ +((ehca_edeb_filter(level, EDEB_ID_TO_U32(DEB_PREFIX), __LINE__) & \ + 0x100000000L) == 0) + +#define EDEB_P_GENERIC(level,idstring,format,args...) \ +do { \ + u64 ehca_edeb_filterresult = \ + ehca_edeb_filter(level, EDEB_ID_TO_U32(DEB_PREFIX), __LINE__);\ + if ((ehca_edeb_filterresult & 0x100000000L) == 0) \ + printk("PU%04x %08x:%s " idstring " "format "\n", \ + get_paca()->paca_index, (u32)(ehca_edeb_filterresult), \ + __func__, ##args); \ +} while (1 == 0) + +#elif REAL_HCALL + +#define EDEB_P_GENERIC(level,idstring,format,args...) \ +do { \ + u64 ehca_edeb_filterresult = \ + ehca_edeb_filter(level, EDEB_ID_TO_U32(DEB_PREFIX), __LINE__); \ + if ((ehca_edeb_filterresult & 0x100000000L) == 0) \ + printk("%08x:%s " idstring " "format "\n", \ + (u32)(ehca_edeb_filterresult), \ + __func__, ##args); \ +} while (1 == 0) + +#endif +#else + +#define IS_EDEB_ON(level) (1) + +#define EDEB_P_GENERIC(level,idstring,format,args...) \ +do { \ + printk("%s " idstring " "format "\n", \ + __func__, ##args); \ +} while (1 == 0) + +#endif + +/** + * EDEB - Trace output macro. + * @level: tracelevel + * @format: optional format string, use "" if not desired + * @args: printf like arguments for trace + */ +#define EDEB(level,format,args...) \ + EDEB_P_GENERIC(level,"",format,##args) +#define EDEB_ERR(level,format,args...) \ + EDEB_P_GENERIC(level,"HCAD_ERROR ",format,##args) +#define EDEB_EN(level,format,args...) \ + EDEB_P_GENERIC(level,">>>",format,##args) +#define EDEB_EX(level,format,args...) \ + EDEB_P_GENERIC(level,"<<<",format,##args) + +/** + * EDEB_DMP - macro to dump a memory block, whose length is n*8 bytes. + * Each line has the following layout: + * adr=X ofs=Y <8 bytes hex> <8 bytes hex> + */ +#define EDEB_DMP(level,adr,len,format,args...) \ + do { \ + unsigned int x; \ + unsigned int l = (unsigned int)(len); \ + unsigned char *deb = (unsigned char*)(adr); \ + for (x = 0; x < l; x += 16) { \ + EDEB(level, format " adr=%p ofs=%04x %016lx %016lx", \ + ##args, deb, x, \ + *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ + deb += 16; \ + } \ + } while (0) + +/* define a bitmask, little endian version */ +#define EHCA_BMASK(pos,length) (((pos)<<16)+(length)) + +/* define a bitmask, the ibm way... */ +#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1)) + +/* internal function, don't use */ +#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff) + +/* internal function, don't use */ +#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff)) + +/** + * EHCA_BMASK_SET - return value shifted and masked by mask + * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable + * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask + * in variable + */ +#define EHCA_BMASK_SET(mask,value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<>EHCA_BMASK_SHIFTPOS(mask))) + +#define PARANOIA_MODE +#ifdef PARANOIA_MODE + +#define EHCA_CHECK_ADR_P(adr) \ + if (unlikely(adr == 0)) { \ + EDEB_ERR(4, "adr=%p check failed line %i", adr, \ + __LINE__); \ + return ERR_PTR(-EFAULT); } + +#define EHCA_CHECK_ADR(adr) \ + if (unlikely(adr == 0)) { \ + EDEB_ERR(4, "adr=%p check failed line %i", adr, \ + __LINE__); \ + return -EFAULT; } + +#define EHCA_CHECK_DEVICE_P(device) \ + if (unlikely(device == 0)) { \ + EDEB_ERR(4, "device=%p check failed", device); \ + return ERR_PTR(-EFAULT); } + +#define EHCA_CHECK_DEVICE(device) \ + if (unlikely(device == 0)) { \ + EDEB_ERR(4, "device=%p check failed", device); \ + return -EFAULT; } + +#define EHCA_CHECK_PD(pd) \ + if (unlikely(pd == 0)) { \ + EDEB_ERR(4, "pd=%p check failed", pd); \ + return -EFAULT; } + +#define EHCA_CHECK_PD_P(pd) \ + if (unlikely(pd == 0)) { \ + EDEB_ERR(4, "pd=%p check failed", pd); \ + return ERR_PTR(-EFAULT); } + +#define EHCA_CHECK_AV(av) \ + if (unlikely(av == 0)) { \ + EDEB_ERR(4, "av=%p check failed", av); \ + return -EFAULT; } + +#define EHCA_CHECK_AV_P(av) \ + if (unlikely(av == 0)) { \ + EDEB_ERR(4, "av=%p check failed", av); \ + return ERR_PTR(-EFAULT); } + +#define EHCA_CHECK_CQ(cq) \ + if (unlikely(cq == 0)) { \ + EDEB_ERR(4, "cq=%p check failed", cq); \ + return -EFAULT; } + +#define EHCA_CHECK_CQ_P(cq) \ + if (unlikely(cq == 0)) { \ + EDEB_ERR(4, "cq=%p check failed", cq); \ + return ERR_PTR(-EFAULT); } + +#define EHCA_CHECK_EQ(eq) \ + if (unlikely(eq == 0)) { \ + EDEB_ERR(4, "eq=%p check failed", eq); \ + return -EFAULT; } + +#define EHCA_CHECK_EQ_P(eq) \ + if (unlikely(eq == 0)) { \ + EDEB_ERR(4, "eq=%p check failed", eq); \ + return ERR_PTR(-EFAULT); } + +#define EHCA_CHECK_QP(qp) \ + if (unlikely(qp == 0)) { \ + EDEB_ERR(4, "qp=%p check failed", qp); \ + return -EFAULT; } + +#define EHCA_CHECK_QP_P(qp) \ + if (unlikely(qp == 0)) { \ + EDEB_ERR(4, "qp=%p check failed", qp); \ + return ERR_PTR(-EFAULT); } + +#define EHCA_CHECK_MR(mr) \ + if (unlikely(mr == 0)) { \ + EDEB_ERR(4, "mr=%p check failed", mr); \ + return -EFAULT; } + +#define EHCA_CHECK_MR_P(mr) \ + if (unlikely(mr == 0)) { \ + EDEB_ERR(4, "mr=%p check failed", mr); \ + return ERR_PTR(-EFAULT); } + +#define EHCA_CHECK_MW(mw) \ + if (unlikely(mw == 0)) { \ + EDEB_ERR(4, "mw=%p check failed", mw); \ + return -EFAULT; } + +#define EHCA_CHECK_MW_P(mw) \ + if (unlikely(mw == 0)) { \ + EDEB_ERR(4, "mw=%p check failed", mw); \ + return ERR_PTR(-EFAULT); } + +#define EHCA_CHECK_FMR(fmr) \ + if (unlikely(fmr == 0)) { \ + EDEB_ERR(4, "fmr=%p check failed", fmr); \ + return -EFAULT; } + +#define EHCA_CHECK_FMR_P(fmr) \ + if (unlikely(fmr == 0)) { \ + EDEB_ERR(4, "fmr=%p check failed", fmr); \ + return ERR_PTR(-EFAULT); } + +#define EHCA_REGISTER_PD(device,pd) +#define EHCA_REGISTER_AV(pd,av) +#define EHCA_DEREGISTER_PD(PD) +#define EHCA_DEREGISTER_AV(av) +#else +#define EHCA_CHECK_DEVICE_P(device) + +#define EHCA_CHECK_PD(pd) +#define EHCA_REGISTER_PD(device,pd) +#define EHCA_DEREGISTER_PD(PD) +#endif + +static inline int ehca_adr_bad(void *adr) +{ + return !adr; +} + +/* Converts ehca to ib return code */ +static inline int ehca2ib_return_code(u64 ehca_rc) +{ + switch (ehca_rc) { + case H_SUCCESS: + return 0; + case H_BUSY: + return -EBUSY; + case H_NO_MEM: + return -ENOMEM; + default: + return -EINVAL; + } +} + +#endif /* EHCA_TOOLS_H */ diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c new file mode 100644 index 0000000..c148c23 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -0,0 +1,400 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * userspace support verbs + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#undef DEB_PREFIX +#define DEB_PREFIX "uver" + +#include + +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata) +{ + struct ehca_ucontext *my_context = NULL; + + EHCA_CHECK_ADR_P(device); + EDEB_EN(7, "device=%p name=%s", device, device->name); + + my_context = kzalloc(sizeof *my_context, GFP_KERNEL); + if (!my_context) { + EDEB_ERR(4, "Out of memory device=%p", device); + return ERR_PTR(-ENOMEM); + } + + EDEB_EX(7, "device=%p ucontext=%p", device, my_context); + + return &my_context->ib_ucontext; +} + +int ehca_dealloc_ucontext(struct ib_ucontext *context) +{ + struct ehca_ucontext *my_context = NULL; + EHCA_CHECK_ADR(context); + EDEB_EN(7, "ucontext=%p", context); + my_context = container_of(context, struct ehca_ucontext, ib_ucontext); + kfree(my_context); + EDEB_EN(7, "ucontext=%p", context); + return 0; +} + +struct page *ehca_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + struct page *mypage = NULL; + u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; + u32 idr_handle = fileoffset >> 32; + u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ + u32 cur_pid = current->tgid; + unsigned long flags; + + EDEB_EN(7, "vm_start=%lx vm_end=%lx vm_page_prot=%lx vm_fileoff=%lx " + "address=%lx", + vma->vm_start, vma->vm_end, vma->vm_page_prot, fileoffset, + address); + + if (q_type == 1) { /* CQ */ + struct ehca_cq *cq = NULL; + u64 offset; + void *vaddr = NULL; + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, idr_handle); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + if (cq->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, cq->ownpid); + return NOPAGE_SIGBUS; + } + + /* make sure this mmap really belongs to the authorized user */ + if (!cq) { + EDEB_ERR(4, "cq is NULL ret=NOPAGE_SIGBUS"); + return NOPAGE_SIGBUS; + } + if (rsrc_type == 2) { + EDEB(6, "cq=%p cq queuearea", cq); + offset = address - vma->vm_start; + vaddr = ipz_qeit_calc(&cq->ipz_queue, offset); + EDEB(6, "offset=%lx vaddr=%p", offset, vaddr); + mypage = virt_to_page(vaddr); + } + } else if (q_type == 2) { /* QP */ + struct ehca_qp *qp = NULL; + struct ehca_pd *pd = NULL; + u64 offset; + void *vaddr = NULL; + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + qp = idr_find(&ehca_qp_idr, idr_handle); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + + pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); + if (pd->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, pd->ownpid); + return NOPAGE_SIGBUS; + } + + /* make sure this mmap really belongs to the authorized user */ + if (!qp) { + EDEB_ERR(4, "qp is NULL ret=NOPAGE_SIGBUS"); + return NOPAGE_SIGBUS; + } + if (rsrc_type == 2) { /* rqueue */ + EDEB(6, "qp=%p qp rqueuearea", qp); + offset = address - vma->vm_start; + vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset); + EDEB(6, "offset=%lx vaddr=%p", offset, vaddr); + mypage = virt_to_page(vaddr); + } else if (rsrc_type == 3) { /* squeue */ + EDEB(6, "qp=%p qp squeuearea", qp); + offset = address - vma->vm_start; + vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset); + EDEB(6, "offset=%lx vaddr=%p", offset, vaddr); + mypage = virt_to_page(vaddr); + } + } + + if (!mypage) { + EDEB_ERR(4, "Invalid page adr==NULL ret=NOPAGE_SIGBUS"); + return NOPAGE_SIGBUS; + } + get_page(mypage); + EDEB_EX(7, "page adr=%p", mypage); + return mypage; +} + +static struct vm_operations_struct ehcau_vm_ops = { + .nopage = ehca_nopage, +}; + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; + u32 idr_handle = fileoffset >> 32; + u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ + u32 ret = -EFAULT; /* assume the worst */ + u64 vsize = 0; /* must be calculated/set below */ + u64 physical = 0; /* must be calculated/set below */ + u32 cur_pid = current->tgid; + unsigned long flags; + + EDEB_EN(7, "vm_start=%lx vm_end=%lx vm_page_prot=%lx vm_fileoff=%lx", + vma->vm_start, vma->vm_end, vma->vm_page_prot, fileoffset); + + if (q_type == 1) { /* CQ */ + struct ehca_cq *cq; + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, idr_handle); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + if (cq->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, cq->ownpid); + return -ENOMEM; + } + + /* make sure this mmap really belongs to the authorized user */ + if (!cq) + return -EINVAL; + if (!cq->ib_cq.uobject) + return -EINVAL; + if (cq->ib_cq.uobject->context != context) + return -EINVAL; + if (rsrc_type == 1) { /* galpa fw handle */ + EDEB(6, "cq=%p cq triggerarea", cq); + vma->vm_flags |= VM_RESERVED; + vsize = vma->vm_end - vma->vm_start; + if (vsize != EHCA_PAGESIZE) { + EDEB_ERR(4, "invalid vsize=%lx", + vma->vm_end - vma->vm_start); + ret = -EINVAL; + goto mmap_exit0; + } + + physical = cq->galpas.user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_IO | VM_RESERVED; + + EDEB(6, "vsize=%lx physical=%lx", vsize, physical); + ret = remap_pfn_range(vma, vma->vm_start, + physical >> PAGE_SHIFT, vsize, + vma->vm_page_prot); + if (ret) { + EDEB_ERR(4, "remap_pfn_range() failed ret=%x", + ret); + ret = -ENOMEM; + } + goto mmap_exit0; + } else if (rsrc_type == 2) { /* cq queue_addr */ + EDEB(6, "cq=%p cq q_addr", cq); + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &ehcau_vm_ops; + ret = 0; + goto mmap_exit0; + } else { + EDEB_ERR(6, "bad resource type %x", rsrc_type); + ret = -EINVAL; + goto mmap_exit0; + } + } else if (q_type == 2) { /* QP */ + struct ehca_qp *qp = NULL; + struct ehca_pd *pd = NULL; + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + qp = idr_find(&ehca_qp_idr, idr_handle); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); + if (pd->ownpid != cur_pid) { + EDEB_ERR(4, "Invalid caller pid=%x ownpid=%x", + cur_pid, pd->ownpid); + return -ENOMEM; + } + + /* make sure this mmap really belongs to the authorized user */ + if (!qp || !qp->ib_qp.uobject || + qp->ib_qp.uobject->context != context) { + EDEB(6, "qp=%p, uobject=%p, context=%p", + qp, qp->ib_qp.uobject, qp->ib_qp.uobject->context); + ret = -EINVAL; + goto mmap_exit0; + } + if (rsrc_type == 1) { /* galpa fw handle */ + EDEB(6, "qp=%p qp triggerarea", qp); + vma->vm_flags |= VM_RESERVED; + vsize = vma->vm_end - vma->vm_start; + if (vsize != EHCA_PAGESIZE) { + EDEB_ERR(4, "invalid vsize=%lx", + vma->vm_end - vma->vm_start); + ret = -EINVAL; + goto mmap_exit0; + } + + physical = qp->galpas.user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_IO | VM_RESERVED; + + EDEB(6, "vsize=%lx physical=%lx", vsize, physical); + ret = remap_pfn_range(vma, vma->vm_start, + physical >> PAGE_SHIFT, vsize, + vma->vm_page_prot); + if (ret) { + EDEB_ERR(4, "remap_pfn_range() failed ret=%x", + ret); + ret = -ENOMEM; + } + goto mmap_exit0; + } else if (rsrc_type == 2) { /* qp rqueue_addr */ + EDEB(6, "qp=%p qp rqueue_addr", qp); + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &ehcau_vm_ops; + ret = 0; + goto mmap_exit0; + } else if (rsrc_type == 3) { /* qp squeue_addr */ + EDEB(6, "qp=%p qp squeue_addr", qp); + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &ehcau_vm_ops; + ret = 0; + goto mmap_exit0; + } else { + EDEB_ERR(4, "bad resource type %x", rsrc_type); + ret = -EINVAL; + goto mmap_exit0; + } + } else { + EDEB_ERR(4, "bad queue type %x", q_type); + ret = -EINVAL; + goto mmap_exit0; + } + +mmap_exit0: + EDEB_EX(7, "ret=%x", ret); + return ret; +} + +int ehca_mmap_nopage(u64 foffset, u64 length, void ** mapped, + struct vm_area_struct ** vma) +{ + EDEB_EN(7, "foffset=%lx length=%lx", foffset, length); + down_write(¤t->mm->mmap_sem); + *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + foffset); + up_write(¤t->mm->mmap_sem); + if (!(*mapped)) { + EDEB_ERR(4, "couldn't mmap foffset=%lx length=%lx", + foffset, length); + return -EINVAL; + } + + *vma = find_vma(current->mm, (u64)*mapped); + if (!(*vma)) { + down_write(¤t->mm->mmap_sem); + do_munmap(current->mm, 0, length); + up_write(¤t->mm->mmap_sem); + EDEB_ERR(4, "couldn't find vma queue=%p", *mapped); + return -EINVAL; + } + (*vma)->vm_flags |= VM_RESERVED; + (*vma)->vm_ops = &ehcau_vm_ops; + + EDEB_EX(7, "mapped=%p", *mapped); + return 0; +} + +int ehca_mmap_register(u64 physical, void ** mapped, + struct vm_area_struct ** vma) +{ + int ret = 0; + unsigned long vsize; + /* ehca hw supports only 4k page */ + ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma); + if (ret) { + EDEB(4, "could'nt mmap physical=%lx", physical); + return ret; + } + + (*vma)->vm_flags |= VM_RESERVED; + vsize = (*vma)->vm_end - (*vma)->vm_start; + if (vsize != EHCA_PAGESIZE) { + EDEB_ERR(4, "invalid vsize=%lx", + (*vma)->vm_end - (*vma)->vm_start); + ret = -EINVAL; + return ret; + } + + (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot); + (*vma)->vm_flags |= VM_IO | VM_RESERVED; + + EDEB(6, "vsize=%lx physical=%lx", vsize, physical); + ret = remap_pfn_range((*vma), (*vma)->vm_start, + physical >> PAGE_SHIFT, vsize, + (*vma)->vm_page_prot); + if (ret) { + EDEB_ERR(4, "remap_pfn_range() failed ret=%x", ret); + ret = -ENOMEM; + } + return ret; + +} + +int ehca_munmap(unsigned long addr, size_t len) { + int ret = 0; + struct mm_struct *mm = current->mm; + if (mm) { + down_write(&mm->mmap_sem); + ret = do_munmap(mm, addr, len); + up_write(&mm->mmap_sem); + } + return ret; +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c new file mode 100644 index 0000000..7181655 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -0,0 +1,1474 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define DEB_PREFIX "hcpi" + +#include +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hcp_phyp.h" +#include "hipz_fns.h" + +#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9,11) +#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12,12) +#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13,15) +#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18,18) +#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19,21) +#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22,23) +#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31,31) +#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56,63) + +#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0,15) +#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16,31) +#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32,39) +#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40,47) + +#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16,31) +#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48,63) +#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8,15) +#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24,31) + +#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0,31) +#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32,63) + +/* direct access qp controls */ +#define DAQP_CTRL_ENABLE 0x01 +#define DAQP_CTRL_SEND_COMP 0x20 +#define DAQP_CTRL_RECV_COMP 0x40 + +static u32 get_longbusy_msecs(int longbusy_rc) +{ + switch (longbusy_rc) { + case H_LONG_BUSY_ORDER_1_MSEC: + return 1; + case H_LONG_BUSY_ORDER_10_MSEC: + return 10; + case H_LONG_BUSY_ORDER_100_MSEC: + return 100; + case H_LONG_BUSY_ORDER_1_SEC: + return 1000; + case H_LONG_BUSY_ORDER_10_SEC: + return 10000; + case H_LONG_BUSY_ORDER_100_SEC: + return 100000; + default: + return 1; + } +} + +static long ehca_hcall_7arg_7ret(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long *out1, + unsigned long *out2, + unsigned long *out3, + unsigned long *out4, + unsigned long *out5, + unsigned long *out6, + unsigned long *out7) +{ + long ret = H_SUCCESS; + int i, sleep_msecs; + + EDEB_EN(7, "opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx arg5=%lx" + " arg6=%lx arg7=%lx", opcode, arg1, arg2, arg3, arg4, arg5, + arg6, arg7); + + for (i = 0; i < 5; i++) { + ret = plpar_hcall_7arg_7ret(opcode, + arg1, arg2, arg3, arg4, + arg5, arg6, arg7, + out1, out2, out3, out4, + out5, out6,out7); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + EDEB_ERR(4, "opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx" + " out1=%lx out2=%lx out3=%lx out4=%lx" + " out5=%lx out6=%lx out7=%lx", + opcode, ret, + arg1, arg2, arg3, arg4, + arg5, arg6, arg7, + *out1, *out2, *out3, *out4, + *out5, *out6, *out7); + + EDEB_EX(7, "opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx " + "out4=%lx out5=%lx out6=%lx out7=%lx", + opcode, ret, *out1, *out2, *out3, *out4, *out5, + *out6, *out7); + return ret; + } + + EDEB_EX(7, "opcode=%lx ret=H_BUSY", opcode); + + return H_BUSY; +} + +static long ehca_hcall_9arg_9ret(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long arg8, + unsigned long arg9, + unsigned long *out1, + unsigned long *out2, + unsigned long *out3, + unsigned long *out4, + unsigned long *out5, + unsigned long *out6, + unsigned long *out7, + unsigned long *out8, + unsigned long *out9) +{ + long ret = H_SUCCESS; + int i, sleep_msecs; + + EDEB_EN(7, "opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " + "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7, + arg8, arg9); + + + for (i = 0; i < 5; i++) { + ret = plpar_hcall_9arg_9ret(opcode, + arg1, arg2, arg3, arg4, + arg5, arg6, arg7, arg8, + arg9, + out1, out2, out3, out4, + out5, out6, out7, out8, + out9); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + EDEB_ERR(4, "opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx arg8=%lx" + " arg9=%lx" + " out1=%lx out2=%lx out3=%lx out4=%lx" + " out5=%lx out6=%lx out7=%lx out8=%lx" + " out9=%lx", + opcode, ret, + arg1, arg2, arg3, arg4, + arg5, arg6, arg7, arg8, + arg9, + *out1, *out2, *out3, *out4, + *out5, *out6, *out7, *out8, + *out9); + + EDEB_EX(7, "opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx " + "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx out9=%lx", + opcode, ret,*out1, *out2, *out3, *out4, *out5, *out6, + *out7, *out8, *out9); + return ret; + + } + + EDEB_EX(7, "opcode=%lx ret=H_BUSY", opcode); + return H_BUSY; +} + +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 * act_nr_of_entries, + u32 * act_pages, + u32 * eq_ist) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 act_nr_of_entries_out = 0; + u64 act_pages_out = 0; + u64 eq_ist_out = 0; + u64 allocate_controls = 0; + u32 x = (u64)(&x); + + EDEB_EN(7, "pfeq=%p adapter_handle=%lx new_control=%x" + " number_of_entries=%x", + pfeq, adapter_handle.handle, neq_control, + number_of_entries); + + /* resource type */ + allocate_controls = 3ULL; + + /* ISN is associated */ + if (neq_control != 1) + allocate_controls = (1ULL << (63 - 7)) | allocate_controls; + else /* notification event queue */ + allocate_controls = (1ULL << 63) | allocate_controls; + + ret = ehca_hcall_7arg_7ret(H_ALLOC_RESOURCE, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + number_of_entries, /* r6 */ + 0, 0, 0, 0, + &eq_handle->handle, /* r4 */ + &dummy, /* r5 */ + &dummy, /* r6 */ + &act_nr_of_entries_out, /* r7 */ + &act_pages_out, /* r8 */ + &eq_ist_out, /* r8 */ + &dummy); + + *act_nr_of_entries = (u32)act_nr_of_entries_out; + *act_pages = (u32)act_pages_out; + *eq_ist = (u32)eq_ist_out; + + if (ret == H_NOT_ENOUGH_RESOURCES) + EDEB_ERR(4, "Not enough resource - ret=%lx ", ret); + + EDEB_EX(7, "act_nr_of_entries=%x act_pages=%x eq_ist=%x", + *act_nr_of_entries, *act_pages, *eq_ist); + + return ret; +} + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask) +{ + u64 ret = H_SUCCESS; + u64 dummy; + + EDEB_EN(7, "eq_handle=%lx, adapter_handle=%lx event_mask=%lx", + eq_handle.handle, adapter_handle.handle, event_mask); + + ret = ehca_hcall_7arg_7ret(H_RESET_EVENTS, + adapter_handle.handle, /* r4 */ + eq_handle.handle, /* r5 */ + event_mask, /* r6 */ + 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + EDEB(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 act_nr_of_entries_out; + u64 act_pages_out; + u64 g_la_privileged_out; + u64 g_la_user_out; + + EDEB_EN(7, "Adapter_handle=%lx eq_handle=%lx cq_token=%x" + " cq_number_of_entries=%x", + adapter_handle.handle, param->eq_handle.handle, + cq->token, param->nr_cqe); + + ret = ehca_hcall_7arg_7ret(H_ALLOC_RESOURCE, + adapter_handle.handle, /* r4 */ + 2, /* r5 */ + param->eq_handle.handle, /* r6 */ + cq->token, /* r7 */ + param->nr_cqe, /* r8 */ + 0, 0, + &cq->ipz_cq_handle.handle, /* r4 */ + &dummy, /* r5 */ + &dummy, /* r6 */ + &act_nr_of_entries_out, /* r7 */ + &act_pages_out, /* r8 */ + &g_la_privileged_out, /* r9 */ + &g_la_user_out); /* r10 */ + + param->act_nr_of_entries = (u32)act_nr_of_entries_out; + param->act_pages = (u32)act_pages_out; + + if (ret == H_SUCCESS) + hcp_galpas_ctor(&cq->galpas, g_la_privileged_out, g_la_user_out); + + if (ret == H_NOT_ENOUGH_RESOURCES) + EDEB_ERR(4, "Not enough resources. ret=%lx", ret); + + EDEB_EX(7, "cq_handle=%lx act_nr_of_entries=%x act_pages=%x", + cq->ipz_cq_handle.handle, param->act_nr_of_entries, param->act_pages); + + return ret; +} + +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp, + struct ehca_alloc_qp_parms *parms) +{ + u64 ret = H_SUCCESS; + u64 allocate_controls; + u64 max_r10_reg; + u64 dummy = 0; + u64 qp_nr_out = 0; + u64 r6_out = 0; + u64 r7_out = 0; + u64 r8_out = 0; + u64 g_la_user_out = 0; + u64 r11_out = 0; + u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1; + u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1; + int daqp_ctrl = parms->daqp_ctrl; + + EDEB_EN(7, "Adapter_handle=%lx servicetype=%x signalingtype=%x" + " ud_av_l_key=%x send_cq_handle=%lx receive_cq_handle=%lx" + " async_eq_handle=%lx qp_token=%x pd=%x max_nr_send_wqes=%x" + " max_nr_receive_wqes=%x max_nr_send_sges=%x" + " max_nr_receive_sges=%x ud_av_l_key=%x galpa.pid=%x", + adapter_handle.handle, parms->servicetype, parms->sigtype, + parms->ud_av_l_key_ctl, qp->send_cq->ipz_cq_handle.handle, + qp->recv_cq->ipz_cq_handle.handle, parms->ipz_eq_handle.handle, + qp->token, parms->pd.value, max_nr_send_wqes, + max_nr_receive_wqes, parms->max_send_sge, parms->max_recv_sge, + parms->ud_av_l_key_ctl, qp->galpas.pid); + + allocate_controls = + EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, + (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) + | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, + (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, + (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, + parms->ud_av_l_key_ctl) + | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); + + max_r10_reg = + EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, + max_nr_send_wqes) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, + max_nr_receive_wqes) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, + parms->max_send_sge) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, + parms->max_recv_sge); + + + ret = ehca_hcall_9arg_9ret(H_ALLOC_RESOURCE, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + qp->send_cq->ipz_cq_handle.handle, + qp->recv_cq->ipz_cq_handle.handle, + parms->ipz_eq_handle.handle, + ((u64)qp->token << 32) | parms->pd.value, + max_r10_reg, /* r10 */ + parms->ud_av_l_key_ctl, /* r11 */ + 0, + &qp->ipz_qp_handle.handle, + &qp_nr_out, /* r5 */ + &r6_out, /* r6 */ + &r7_out, /* r7 */ + &r8_out, /* r8 */ + &dummy, /* r9 */ + &g_la_user_out, /* r10 */ + &r11_out, + &dummy); + + /* extract outputs */ + qp->real_qp_num = (u32)qp_nr_out; + + parms->act_nr_send_sges = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, r6_out); + parms->act_nr_recv_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, r6_out); + parms->act_nr_send_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, r7_out); + parms->act_nr_recv_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, r7_out); + parms->nr_sq_pages = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, r8_out); + parms->nr_rq_pages = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, r8_out); + + if (ret == H_SUCCESS) + hcp_galpas_ctor(&qp->galpas, g_la_user_out, g_la_user_out); + + if (ret == H_NOT_ENOUGH_RESOURCES) + EDEB_ERR(4, "Not enough resources. ret=%lx",ret); + + EDEB_EX(7, "qp_nr=%x act_nr_send_wqes=%x" + " act_nr_receive_wqes=%x act_nr_send_sges=%x" + " act_nr_receive_sges=%x nr_sq_pages=%x" + " nr_rq_pages=%x galpa.user=%lx galpa.kernel=%lx", + qp->real_qp_num, parms->act_nr_send_wqes, + parms->act_nr_recv_wqes, parms->act_nr_send_sges, + parms->act_nr_recv_sges, parms->nr_sq_pages, + parms->nr_rq_pages, qp->galpas.user.fw_handle, + qp->galpas.kernel.fw_handle); + + return ret; +} + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 r_cb; + + EDEB_EN(7, "adapter_handle=%lx port_id %x", + adapter_handle.handle, port_id); + + if (((u64)query_port_response_block) & 0xfff) { + EDEB_ERR(4, "response block not page aligned"); + return H_PARAMETER; + } + + r_cb = virt_to_abs(query_port_response_block); + + ret = ehca_hcall_7arg_7ret(H_QUERY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + r_cb, /* r6 */ + 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + EDEB_DMP(7, query_port_response_block, 64, "query_port_response_block"); + EDEB(7, "offset31=%x offset35=%x offset36=%x", + ((u32*)query_port_response_block)[32], + ((u32*)query_port_response_block)[36], + ((u32*)query_port_response_block)[37]); + EDEB(7, "offset200=%x offset201=%x offset202=%x " + "offset203=%x", + ((u32*)query_port_response_block)[0x200], + ((u32*)query_port_response_block)[0x201], + ((u32*)query_port_response_block)[0x202], + ((u32*)query_port_response_block)[0x203]); + + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 r_cb; + EDEB_EN(7, "adapter_handle=%lx", adapter_handle.handle); + + if (((u64)query_hca_rblock) & 0xfff) { + EDEB_ERR(4, "response_block=%p not page aligned", + query_hca_rblock); + return H_PARAMETER; + } + + r_cb = virt_to_abs(query_hca_rblock); + + ret = ehca_hcall_7arg_7ret(H_QUERY_HCA, + adapter_handle.handle, /* r4 */ + r_cb, /* r5 */ + 0, 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + EDEB(7, "offset0=%x offset1=%x offset2=%x offset3=%x", + ((u32*)query_hca_rblock)[0], + ((u32*)query_hca_rblock)[1], + ((u32*)query_hca_rblock)[2], ((u32*)query_hca_rblock)[3]); + EDEB(7, "offset4=%x offset5=%x offset6=%x offset7=%x", + ((u32*)query_hca_rblock)[4], + ((u32*)query_hca_rblock)[5], + ((u32*)query_hca_rblock)[6], ((u32*)query_hca_rblock)[7]); + EDEB(7, "offset8=%x offset9=%x offseta=%x offsetb=%x", + ((u32*)query_hca_rblock)[8], + ((u32*)query_hca_rblock)[9], + ((u32*)query_hca_rblock)[10], ((u32*)query_hca_rblock)[11]); + EDEB(7, "offsetc=%x offsetd=%x offsete=%x offsetf=%x", + ((u32*)query_hca_rblock)[12], + ((u32*)query_hca_rblock)[13], + ((u32*)query_hca_rblock)[14], ((u32*)query_hca_rblock)[15]); + EDEB(7, "offset136=%x offset192=%x offset204=%x", + ((u32*)query_hca_rblock)[32], + ((u32*)query_hca_rblock)[48], ((u32*)query_hca_rblock)[51]); + EDEB(7, "offset231=%x offset235=%x", + ((u32*)query_hca_rblock)[57], ((u32*)query_hca_rblock)[58]); + EDEB(7, "offset200=%x offset201=%x offset202=%x offset203=%x", + ((u32*)query_hca_rblock)[0x201], + ((u32*)query_hca_rblock)[0x202], + ((u32*)query_hca_rblock)[0x203], + ((u32*)query_hca_rblock)[0x204]); + + EDEB_EX(7, "ret=%lx adapter_handle=%lx", + ret, adapter_handle.handle); + + return ret; +} + +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count) +{ + u64 ret = H_SUCCESS; + u64 dummy; + + EDEB_EN(7, "adapter_handle=%lx pagesize=%x queue_type=%x" + " resource_handle=%lx logical_address_of_page=%lx count=%lx", + adapter_handle.handle, pagesize, queue_type, + resource_handle, logical_address_of_page, count); + + ret = ehca_hcall_7arg_7ret(H_REGISTER_RPAGES, + adapter_handle.handle, /* r4 */ + queue_type | pagesize << 8, /* r5 */ + resource_handle, /* r6 */ + logical_address_of_page, /* r7 */ + count, /* r8 */ + 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + u64 ret = H_SUCCESS; + + EDEB_EN(7, "pfeq=%p adapter_handle=%lx eq_handle=%lx pagesize=%x" + " queue_type=%x logical_address_of_page=%lx count=%lx", + pfeq, adapter_handle.handle, eq_handle.handle, pagesize, + queue_type,logical_address_of_page, count); + + if (count != 1) { + EDEB_ERR(4, "Ppage counter=%lx", count); + return H_PARAMETER; + } + ret = hipz_h_register_rpage(adapter_handle, + pagesize, + queue_type, + eq_handle.handle, + logical_address_of_page, count); + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u32 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, + u32 ist) +{ + u32 ret = H_SUCCESS; + u64 dummy = 0; + + EDEB_EN(7, "ist=%x", ist); + + ret = ehca_hcall_7arg_7ret(H_QUERY_INT_STATE, + adapter_handle.handle, /* r4 */ + ist, /* r5 */ + 0, 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + if (ret != H_SUCCESS && ret != H_BUSY) + EDEB_ERR(4, "Could not query interrupt state."); + + EDEB_EX(7, "interrupt state: %x", ret); + + return ret; +} + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal) +{ + u64 ret = H_SUCCESS; + + EDEB_EN(7, "pfcq=%p adapter_handle=%lx cq_handle=%lx pagesize=%x" + " queue_type=%x logical_address_of_page=%lx count=%lx", + pfcq, adapter_handle.handle, cq_handle.handle, pagesize, + queue_type, logical_address_of_page, count); + + if (count != 1) { + EDEB_ERR(4, "Page counter=%lx", count); + return H_PARAMETER; + } + + ret = hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + cq_handle.handle, logical_address_of_page, + count); + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa) +{ + u64 ret = H_SUCCESS; + + EDEB_EN(7, "pfqp=%p adapter_handle=%lx qp_handle=%lx pagesize=%x" + " queue_type=%x logical_address_of_page=%lx count=%lx", + pfqp, adapter_handle.handle, qp_handle.handle, pagesize, + queue_type, logical_address_of_page, count); + + if (count != 1) { + EDEB_ERR(4, "Page counter=%lx", count); + return H_PARAMETER; + } + + ret = hipz_h_register_rpage(adapter_handle,pagesize,queue_type, + qp_handle.handle,logical_address_of_page, + count); + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe2processed, + void **log_addr_next_rq_wqe2processed, + int dis_and_get_function_code) +{ + u64 ret = H_SUCCESS; + u8 function_code = 1; + u64 dummy, dummy1, dummy2; + + EDEB_EN(7, "pfqp=%p adapter_handle=%lx function=%x qp_handle=%lx", + pfqp, adapter_handle.handle, function_code, qp_handle.handle); + + if (!log_addr_next_sq_wqe2processed) + log_addr_next_sq_wqe2processed = (void**)&dummy1; + if (!log_addr_next_rq_wqe2processed) + log_addr_next_rq_wqe2processed = (void**)&dummy2; + + ret = ehca_hcall_7arg_7ret(H_DISABLE_AND_GETC, + adapter_handle.handle, /* r4 */ + dis_and_get_function_code, /* r5 */ + qp_handle.handle, /* r6 */ + 0, 0, 0, 0, + (void*)log_addr_next_sq_wqe2processed, + (void*)log_addr_next_rq_wqe2processed, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + EDEB_EX(7, "ret=%lx ladr_next_rq_wqe_out=%p" + " ladr_next_sq_wqe_out=%p", ret, + *log_addr_next_sq_wqe2processed, + *log_addr_next_rq_wqe2processed); + + return ret; +} + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal) +{ + u64 ret = H_SUCCESS; + u64 invalid_attribute_identifier = 0; + u64 rc_attrib_mask = 0; + u64 dummy; + u64 r_cb; + EDEB_EN(7, "pfqp=%p adapter_handle=%lx qp_handle=%lx" + " update_mask=%lx qp_state=%x mqpcb=%p", + pfqp, adapter_handle.handle, qp_handle.handle, + update_mask, mqpcb->qp_state, mqpcb); + + r_cb = virt_to_abs(mqpcb); + ret = ehca_hcall_7arg_7ret(H_MODIFY_QP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + update_mask, /* r6 */ + r_cb, /* r7 */ + 0, 0, 0, + &invalid_attribute_identifier, /* r4 */ + &dummy, /* r5 */ + &dummy, /* r6 */ + &dummy, /* r7 */ + &dummy, /* r8 */ + &rc_attrib_mask, /* r9 */ + &dummy); + if (ret == H_NOT_ENOUGH_RESOURCES) + EDEB_ERR(4, "Insufficient resources ret=%lx", ret); + + EDEB_EX(7, "ret=%lx invalid_attribute_identifier=%lx" + " invalid_attribute_MASK=%lx", ret, + invalid_attribute_identifier, rc_attrib_mask); + + return ret; +} + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 r_cb; + EDEB_EN(7, "adapter_handle=%lx qp_handle=%lx", + adapter_handle.handle, qp_handle.handle); + + r_cb = virt_to_abs(qqpcb); + EDEB(7, "r_cb=%lx", r_cb); + + ret = ehca_hcall_7arg_7ret(H_QUERY_QP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + r_cb, /* r6 */ + 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 ladr_next_sq_wqe_out; + u64 ladr_next_rq_wqe_out; + + EDEB_EN(7, "qp=%p ipz_qp_handle=%lx adapter_handle=%lx", + qp, qp->ipz_qp_handle.handle, adapter_handle.handle); + + ret = hcp_galpas_dtor(&qp->galpas); + if (ret) { + EDEB_ERR(4, "Could not destruct qp->galpas"); + return H_RESOURCE; + } + ret = ehca_hcall_7arg_7ret(H_DISABLE_AND_GETC, + adapter_handle.handle, /* r4 */ + /* function code */ + 1, /* r5 */ + qp->ipz_qp_handle.handle, /* r6 */ + 0, 0, 0, 0, + &ladr_next_sq_wqe_out, /* r4 */ + &ladr_next_rq_wqe_out, /* r5 */ + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + if (ret == H_HARDWARE) + EDEB_ERR(4, "HCA not operational. ret=%lx", ret); + + ret = ehca_hcall_7arg_7ret(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + qp->ipz_qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + if (ret == H_RESOURCE) + EDEB_ERR(4, "Resource still in use. ret=%lx", ret); + + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port) +{ + u64 ret = H_SUCCESS; + u64 dummy; + + EDEB_EN(7, "port=%x ipz_qp_handle=%lx adapter_handle=%lx", + port, qp_handle.handle, adapter_handle.handle); + + ret = ehca_hcall_7arg_7ret(H_DEFINE_AQP0, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 pma_qp_nr_out; + u64 bma_qp_nr_out; + + EDEB_EN(7, "port=%x qp_handle=%lx adapter_handle=%lx", + port, qp_handle.handle, adapter_handle.handle); + + ret = ehca_hcall_7arg_7ret(H_DEFINE_AQP1, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0, + &pma_qp_nr_out, /* r4 */ + &bma_qp_nr_out, /* r5 */ + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + *pma_qp_nr = (u32)pma_qp_nr_out; + *bma_qp_nr = (u32)bma_qp_nr_out; + + if (ret == H_ALIAS_EXIST) + EDEB_ERR(4, "AQP1 already exists. ret=%lx", ret); + + EDEB_EX(7, "ret=%lx pma_qp_nr=%i bma_qp_nr=%i", + ret, (int)*pma_qp_nr, (int)*bma_qp_nr); + + return ret; +} + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u8 *dgid_sp = (u8*)&subnet_prefix; + u8 *dgid_ii = (u8*)&interface_id; + + EDEB_EN(7, "qp_handle=%lx adapter_handle=%lx\nMCG_DGID =" + " %d.%d.%d.%d.%d.%d.%d.%d." + " %d.%d.%d.%d.%d.%d.%d.%d", + qp_handle.handle, adapter_handle.handle, + dgid_sp[0], dgid_sp[1], + dgid_sp[2], dgid_sp[3], + dgid_sp[4], dgid_sp[5], + dgid_sp[6], dgid_sp[7], + dgid_ii[0], dgid_ii[1], + dgid_ii[2], dgid_ii[3], + dgid_ii[4], dgid_ii[5], + dgid_ii[6], dgid_ii[7]); + + ret = ehca_hcall_7arg_7ret(H_ATTACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + if (ret == H_NOT_ENOUGH_RESOURCES) + EDEB_ERR(4, "Not enough resources. ret=%lx", ret); + + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u8 *dgid_sp = (u8*)&subnet_prefix; + u8 *dgid_ii = (u8*)&interface_id; + + EDEB_EN(7, "qp_handle=%lx adapter_handle=%lx\nMCG_DGID =" + " %d.%d.%d.%d.%d.%d.%d.%d." + " %d.%d.%d.%d.%d.%d.%d.%d", + qp_handle.handle, adapter_handle.handle, + dgid_sp[0], dgid_sp[1], + dgid_sp[2], dgid_sp[3], + dgid_sp[4], dgid_sp[5], + dgid_sp[6], dgid_sp[7], + dgid_ii[0], dgid_ii[1], + dgid_ii[2], dgid_ii[3], + dgid_ii[4], dgid_ii[5], + dgid_ii[6], dgid_ii[7]); + ret = ehca_hcall_7arg_7ret(H_DETACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + EDEB(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag) +{ + u64 ret = H_SUCCESS; + u64 dummy; + + EDEB_EN(7, "cq->pf=%p cq=.%p ipz_cq_handle=%lx adapter_handle=%lx", + &cq->pf, cq, cq->ipz_cq_handle.handle, adapter_handle.handle); + + ret = hcp_galpas_dtor(&cq->galpas); + if (ret) { + EDEB_ERR(4, "Could not destruct cp->galpas"); + return H_RESOURCE; + } + + ret = ehca_hcall_7arg_7ret(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + force_flag != 0 ? 1L : 0L, /* r6 */ + 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + if (ret == H_RESOURCE) + EDEB(4, "ret=%lx ", ret); + + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq) +{ + u64 ret = H_SUCCESS; + u64 dummy; + + EDEB_EN(7, "eq->pf=%p eq=%p ipz_eq_handle=%lx adapter_handle=%lx", + &eq->pf, eq, eq->ipz_eq_handle.handle, + adapter_handle.handle); + + ret = hcp_galpas_dtor(&eq->galpas); + if (ret) { + EDEB_ERR(4, "Could not destruct eq->galpas"); + return H_RESOURCE; + } + + ret = ehca_hcall_7arg_7ret(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + eq->ipz_eq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + + if (ret == H_RESOURCE) + EDEB_ERR(4, "Resource in use. ret=%lx ", ret); + + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 lkey_out; + u64 rkey_out; + + EDEB_EN(7, "adapter_handle=%lx mr=%p vaddr=%lx length=%lx" + " access_ctrl=%x pd=%x", + adapter_handle.handle, mr, vaddr, length, access_ctrl, + pd.value); + + ret = ehca_hcall_7arg_7ret(H_ALLOC_RESOURCE, + adapter_handle.handle, /* r4 */ + 5, /* r5 */ + vaddr, /* r6 */ + length, /* r7 */ + (((u64)access_ctrl) << 32ULL), /* r8 */ + pd.value, /* r9 */ + 0, + &(outparms->handle.handle), /* r4 */ + &dummy, /* r5 */ + &lkey_out, /* r6 */ + &rkey_out, /* r7 */ + &dummy, + &dummy, + &dummy); + outparms->lkey = (u32)lkey_out; + outparms->rkey = (u32)rkey_out; + + EDEB_EX(7, "ret=%lx mr_handle=%lx lkey=%x rkey=%x", + ret, outparms->handle.handle, outparms->lkey, outparms->rkey); + + return ret; +} + +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + u64 ret = H_SUCCESS; + + EDEB_EN(7, "adapter_handle=%lx mr=%p mr_handle=%lx pagesize=%x" + " queue_type=%x logical_address_of_page=%lx count=%lx", + adapter_handle.handle, mr, mr->ipz_mr_handle.handle, pagesize, + queue_type, logical_address_of_page, count); + + if ((count > 1) && (logical_address_of_page & 0xfff)) { + EDEB_ERR(4, "logical_address_of_page not on a 4k boundary " + "adapter_handle=%lx mr=%p mr_handle=%lx " + "pagesize=%x queue_type=%x logical_address_of_page=%lx" + " count=%lx", + adapter_handle.handle, mr, mr->ipz_mr_handle.handle, + pagesize, queue_type, logical_address_of_page, count); + ret = H_PARAMETER; + } else + ret = hipz_h_register_rpage(adapter_handle, pagesize, + queue_type, + mr->ipz_mr_handle.handle, + logical_address_of_page, count); + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 remote_len_out; + u64 remote_vaddr_out; + u64 acc_ctrl_pd_out; + u64 r9_out; + + EDEB_EN(7, "adapter_handle=%lx mr=%p mr_handle=%lx", + adapter_handle.handle, mr, mr->ipz_mr_handle.handle); + + ret = ehca_hcall_7arg_7ret(H_QUERY_MR, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, + &outparms->len, /* r4 */ + &outparms->vaddr, /* r5 */ + &remote_len_out, /* r6 */ + &remote_vaddr_out, /* r7 */ + &acc_ctrl_pd_out, /* r8 */ + &r9_out, + &dummy); + + outparms->acl = acc_ctrl_pd_out >> 32; + outparms->lkey = (u32)(r9_out >> 32); + outparms->rkey = (u32)(r9_out & (0xffffffff)); + + EDEB_EX(7, "ret=%lx mr_local_length=%lx mr_local_vaddr=%lx " + "mr_remote_length=%lx mr_remote_vaddr=%lx access_ctrl=%x " + "pd=%x lkey=%x rkey=%x", ret, outparms->len, + outparms->vaddr, remote_len_out, remote_vaddr_out, + outparms->acl, outparms->acl, outparms->lkey, outparms->rkey); + + return ret; +} + +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr) +{ + u64 ret = H_SUCCESS; + u64 dummy; + + EDEB_EN(7, "adapter_handle=%lx mr=%p mr_handle=%lx", + adapter_handle.handle, mr, mr->ipz_mr_handle.handle); + + ret = ehca_hcall_7arg_7ret(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 lkey_out; + u64 rkey_out; + + EDEB_EN(7, "adapter_handle=%lx mr=%p mr_handle=%lx vaddr_in=%lx " + "length=%lx access_ctrl=%x pd=%x mr_addr_cb=%lx", + adapter_handle.handle, mr, mr->ipz_mr_handle.handle, vaddr_in, + length, access_ctrl, pd.value, mr_addr_cb); + + ret = ehca_hcall_7arg_7ret(H_REREGISTER_PMR, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + length, /* r7 */ + /* r8 */ + ((((u64)access_ctrl) << 32ULL) | pd.value), + mr_addr_cb, /* r9 */ + 0, + &dummy, /* r4 */ + &outparms->vaddr, /* r5 */ + &lkey_out, /* r6 */ + &rkey_out, /* r7 */ + &dummy, + &dummy, + &dummy); + + outparms->lkey = (u32)lkey_out; + outparms->rkey = (u32)rkey_out; + + EDEB_EX(7, "ret=%lx vaddr=%lx lkey=%x rkey=%x", + ret, outparms->vaddr, outparms->lkey, outparms->rkey); + return ret; +} + +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 lkey_out; + u64 rkey_out; + + EDEB_EN(7, "adapter_handle=%lx orig_mr=%p orig_mr_handle=%lx " + "vaddr_in=%lx access_ctrl=%x pd=%x", adapter_handle.handle, + orig_mr, orig_mr->ipz_mr_handle.handle, vaddr_in, access_ctrl, + pd.value); + + + ret = ehca_hcall_7arg_7ret(H_REGISTER_SMR, + adapter_handle.handle, /* r4 */ + orig_mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + (((u64)access_ctrl) << 32ULL), /* r7 */ + pd.value, /* r8 */ + 0, 0, + &(outparms->handle.handle), /* r4 */ + &dummy, /* r5 */ + &lkey_out, /* r6 */ + &rkey_out, /* r7 */ + &dummy, + &dummy, + &dummy); + outparms->lkey = (u32)lkey_out; + outparms->rkey = (u32)rkey_out; + + EDEB_EX(7, "ret=%lx mr_handle=%lx lkey=%x rkey=%x", + ret, outparms->handle.handle, outparms->lkey, outparms->rkey); + + return ret; +} + +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 rkey_out; + + EDEB_EN(7, "adapter_handle=%lx mw=%p pd=%x", + adapter_handle.handle, mw, pd.value); + + ret = ehca_hcall_7arg_7ret(H_ALLOC_RESOURCE, + adapter_handle.handle, /* r4 */ + 6, /* r5 */ + pd.value, /* r6 */ + 0, 0, 0, 0, + &(outparms->handle.handle), /* r4 */ + &dummy, /* r5 */ + &dummy, /* r6 */ + &rkey_out, /* r7 */ + &dummy, + &dummy, + &dummy); + + outparms->rkey = (u32)rkey_out; + + EDEB_EX(7, "ret=%lx mw_handle=%lx rkey=%x", + ret, outparms->handle.handle, outparms->rkey); + return ret; +} + +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 pd_out; + u64 rkey_out; + + EDEB_EN(7, "adapter_handle=%lx mw=%p mw_handle=%lx", + adapter_handle.handle, mw, mw->ipz_mw_handle.handle); + + ret = ehca_hcall_7arg_7ret(H_QUERY_MW, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, + &dummy, /* r4 */ + &dummy, /* r5 */ + &dummy, /* r6 */ + &rkey_out, /* r7 */ + &pd_out, /* r8 */ + &dummy, + &dummy); + outparms->rkey = (u32)rkey_out; + + EDEB_EX(7, "ret=%lx rkey=%x pd=%lx", ret, outparms->rkey, pd_out); + + return ret; +} + +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw) +{ + u64 ret = H_SUCCESS; + u64 dummy; + + EDEB_EN(7, "adapter_handle=%lx mw=%p mw_handle=%lx", + adapter_handle.handle, mw, mw->ipz_mw_handle.handle); + + ret = ehca_hcall_7arg_7ret(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count) +{ + u64 ret = H_SUCCESS; + u64 dummy; + u64 r_cb; + + EDEB_EN(7, "adapter_handle=%lx ressource_handle=%lx rblock=%p", + adapter_handle.handle, ressource_handle, rblock); + + if (((u64)rblock) & 0xfff) { + EDEB_ERR(4, "rblock not page aligned."); + return H_PARAMETER; + } + + r_cb = virt_to_abs(rblock); + + ret = ehca_hcall_7arg_7ret(H_ERROR_DATA, + adapter_handle.handle, + ressource_handle, + r_cb, + 0, 0, 0, 0, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy, + &dummy); + + EDEB_EX(7, "ret=%lx", ret); + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h new file mode 100644 index 0000000..39956d8 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -0,0 +1,261 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_IF_H__ +#define __HCP_IF_H__ + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "hipz_hw.h" + +/* + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize + * resources, create the empty EQPT (ring). + */ +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 * act_nr_of_entries, + u32 * act_pages, + u32 * eq_ist); + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask); +/* + * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize + * resources, create the empty CQPT (ring). + */ +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param); + + +/* + * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, + * initialize resources, create empty QPPTs (2 rings). + */ +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp, + struct ehca_alloc_qp_parms *parms); + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block); + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock); + +/* + * hipz_h_register_rpage internal function in hcp_if.h for all + * hcp_H_REGISTER_RPAGE calls. + */ +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count); + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +u32 hipz_h_query_int_state(const struct ipz_adapter_handle + hcp_adapter_handle, + u32 ist); + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal); + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa); + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe_tb_processed, + void **log_addr_next_rq_wqe_tb_processed, + int dis_and_get_function_code); +enum hcall_sigt { + HCALL_SIGT_NO_CQE = 0, + HCALL_SIGT_BY_WQE = 1, + HCALL_SIGT_EVERY = 2 +}; + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal); + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal); + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp); + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port); + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr); + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag); + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq); + +/* + * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +/* hipz_h_query_mr queries MR in HW and FW */ +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_free_resource_mr frees MR resources in HW and FW */ +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr); + +/* hipz_h_reregister_pmr reregisters MR in HW and FW */ +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_smr register shared MR in HW and FW */ +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* + * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_query_mw queries MW in HW and FW */ +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_free_resource_mw frees MW resources in HW and FW */ +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw); + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count); + +#endif /* __HCP_IF_H__ */ diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c new file mode 100644 index 0000000..d522d50 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -0,0 +1,92 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * load store abstraction for ehca register access with tracing + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define DEB_PREFIX "PHYP" + +#include "ehca_classes.h" +#include "hipz_hw.h" + +int hcall_map_page(u64 physaddr, u64 *mapaddr) +{ + *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE)); + + EDEB(7, "ioremap physaddr=%lx mapaddr=%lx", physaddr, *mapaddr); + return 0; +} + +int hcall_unmap_page(u64 mapaddr) +{ + EDEB(7, "mapaddr=%lx", mapaddr); + iounmap((volatile void __iomem*)mapaddr); + return 0; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, + u64 paddr_kernel, u64 paddr_user) +{ + int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); + if (ret) + return ret; + + galpas->user.fw_handle = paddr_user; + + EDEB(7, "paddr_kernel=%lx paddr_user=%lx galpas->kernel=%lx" + " galpas->user=%lx", + paddr_kernel, paddr_user, galpas->kernel.fw_handle, + galpas->user.fw_handle); + + return ret; +} + +int hcp_galpas_dtor(struct h_galpas *galpas) +{ + int ret = 0; + + if (galpas->kernel.fw_handle) + ret = hcall_unmap_page(galpas->kernel.fw_handle); + + if (ret) + return ret; + + galpas->user.fw_handle = galpas->kernel.fw_handle = 0; + + return ret; +} diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h new file mode 100644 index 0000000..ecb1117 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -0,0 +1,96 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware calls + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Waleri Fomin + * Gerd Bayer + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_PHYP_H__ +#define __HCP_PHYP_H__ + + +/* + * eHCA page (mapped into memory) + * resource to access eHCA register pages in CPU address space +*/ +struct h_galpa { + u64 fw_handle; + /* for pSeries this is a 64bit memory address where + I/O memory is mapped into CPU address space (kv) */ +}; + +/* + * resource to access eHCA address space registers, all types + */ +struct h_galpas { + u32 pid; /*PID of userspace galpa checking */ + struct h_galpa user; /* user space accessible resource, + set to 0 if unused */ + struct h_galpa kernel; /* kernel space accessible resource, + set to 0 if unused */ +}; + +static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) +{ + u64 addr = galpa.fw_handle + offset; + u64 out; + EDEB_EN(7, "addr=%lx offset=%x ", addr, offset); + out = *(u64 *) addr; + EDEB_EX(7, "addr=%lx value=%lx", addr, out); + return out; +} + +static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) +{ + u64 addr = galpa.fw_handle + offset; + EDEB(7, "addr=%lx offset=%x value=%lx", addr, + offset, value); + *(u64 *) addr = value; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, + u64 paddr_kernel, u64 paddr_user); + +int hcp_galpas_dtor(struct h_galpas *galpas); + +int hcall_map_page(u64 physaddr, u64 * mapaddr); + +int hcall_unmap_page(u64 mapaddr); + +#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h new file mode 100644 index 0000000..9dac93d --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_fns.h @@ -0,0 +1,68 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_H__ +#define __HIPZ_FNS_H__ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +#include "hipz_fns_core.h" + +#define hipz_galpa_store_eq(gal, offset, value) \ + hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_eq(gal, offset) \ + hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qped(gal, offset, value) \ + hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) + +#define hipz_galpa_load_qped(gal, offset) \ + hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) + +#define hipz_galpa_store_mrmw(gal, offset, value) \ + hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) + +#define hipz_galpa_load_mrmw(gal, offset) \ + hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) + +#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h new file mode 100644 index 0000000..ff8d7ed --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h @@ -0,0 +1,122 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Heiko J Schick + * Hoang-Nam Nguyen + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_CORE_H__ +#define __HIPZ_FNS_CORE_H__ + +#include "hcp_phyp.h" +#include "hipz_hw.h" + +#define hipz_galpa_store_cq(gal, offset, value) \ + hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_cq(gal, offset) \ + hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qp(gal,offset, value) \ + hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) +#define hipz_galpa_load_qp(gal, offset) \ + hipz_galpa_load(gal,QPTEMM_OFFSET(offset)) + +static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) +{ + struct h_galpa gal; + + EDEB_EN(7, "qp=%p", qp); + gal = qp->galpas.kernel; + /* ringing doorbell :-) */ + hipz_galpa_store_qp(gal, qpx_sqa, EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); + EDEB_EX(7, "qp=%p QPx_SQA = %i", qp, nr_wqes); +} + +static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) +{ + struct h_galpa gal; + + EDEB_EN(7, "qp=%p", qp); + gal = qp->galpas.kernel; + /* ringing doorbell :-) */ + hipz_galpa_store_qp(gal, qpx_rqa, EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); + EDEB_EX(7, "qp=%p QPx_RQA = %i", qp, nr_wqes); +} + +static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) +{ + struct h_galpa gal; + + EDEB_EN(7, "cq=%p", cq); + gal = cq->galpas.kernel; + hipz_galpa_store_cq(gal, cqx_feca, + EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); + EDEB_EX(7, "cq=%p CQx_FECA = %i", cq, nr_cqes); +} + +static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) +{ + struct h_galpa gal; + u64 CQx_N0_reg = 0; + + EDEB_EN(7, "cq=%p event on solicited completion -- write CQx_N0", cq); + gal = cq->galpas.kernel; + hipz_galpa_store_cq(gal, cqx_n0, + EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, + value)); + CQx_N0_reg = hipz_galpa_load_cq(gal, cqx_n0); + EDEB_EX(7, "cq=%p loaded CQx_N0=%lx", cq, (unsigned long)CQx_N0_reg); +} + +static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) +{ + struct h_galpa gal; + u64 CQx_N1_reg = 0; + + EDEB_EN(7, "cq=%p event on completion -- write CQx_N1", + cq); + gal = cq->galpas.kernel; + hipz_galpa_store_cq(gal, cqx_n1, + EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); + CQx_N1_reg = hipz_galpa_load_cq(gal, cqx_n1); + EDEB_EX(7, "cq=%p loaded CQx_N1=%lx", cq, (unsigned long)CQx_N1_reg); +} + +#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h new file mode 100644 index 0000000..4bee72f --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -0,0 +1,391 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * eHCA register definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_HW_H__ +#define __HIPZ_HW_H__ + +#include "ehca_tools.h" + +/* QP Table Entry Memory Map */ +struct hipz_qptemm { + u64 qpx_hcr; + u64 qpx_c; + u64 qpx_herr; + u64 qpx_aer; +/* 0x20*/ + u64 qpx_sqa; + u64 qpx_sqc; + u64 qpx_rqa; + u64 qpx_rqc; +/* 0x40*/ + u64 qpx_st; + u64 qpx_pmstate; + u64 qpx_pmfa; + u64 qpx_pkey; +/* 0x60*/ + u64 qpx_pkeya; + u64 qpx_pkeyb; + u64 qpx_pkeyc; + u64 qpx_pkeyd; +/* 0x80*/ + u64 qpx_qkey; + u64 qpx_dqp; + u64 qpx_dlidp; + u64 qpx_portp; +/* 0xa0*/ + u64 qpx_slidp; + u64 qpx_slidpp; + u64 qpx_dlida; + u64 qpx_porta; +/* 0xc0*/ + u64 qpx_slida; + u64 qpx_slidpa; + u64 qpx_slvl; + u64 qpx_ipd; +/* 0xe0*/ + u64 qpx_mtu; + u64 qpx_lato; + u64 qpx_rlimit; + u64 qpx_rnrlimit; +/* 0x100*/ + u64 qpx_t; + u64 qpx_sqhp; + u64 qpx_sqptp; + u64 qpx_nspsn; +/* 0x120*/ + u64 qpx_nspsnhwm; + u64 reserved1; + u64 qpx_sdsi; + u64 qpx_sdsbc; +/* 0x140*/ + u64 qpx_sqwsize; + u64 qpx_sqwts; + u64 qpx_lsn; + u64 qpx_nssn; +/* 0x160 */ + u64 qpx_mor; + u64 qpx_cor; + u64 qpx_sqsize; + u64 qpx_erc; +/* 0x180*/ + u64 qpx_rnrrc; + u64 qpx_ernrwt; + u64 qpx_rnrresp; + u64 qpx_lmsna; +/* 0x1a0 */ + u64 qpx_sqhpc; + u64 qpx_sqcptp; + u64 qpx_sigt; + u64 qpx_wqecnt; +/* 0x1c0*/ + + u64 qpx_rqhp; + u64 qpx_rqptp; + u64 qpx_rqsize; + u64 qpx_nrr; +/* 0x1e0*/ + u64 qpx_rdmac; + u64 qpx_nrpsn; + u64 qpx_lapsn; + u64 qpx_lcr; +/* 0x200*/ + u64 qpx_rwc; + u64 qpx_rwva; + u64 qpx_rdsi; + u64 qpx_rdsbc; +/* 0x220*/ + u64 qpx_rqwsize; + u64 qpx_crmsn; + u64 qpx_rdd; + u64 qpx_larpsn; +/* 0x240*/ + u64 qpx_pd; + u64 qpx_scqn; + u64 qpx_rcqn; + u64 qpx_aeqn; +/* 0x260*/ + u64 qpx_aaelog; + u64 qpx_ram; + u64 qpx_rdmaqe0; + u64 qpx_rdmaqe1; +/* 0x280*/ + u64 qpx_rdmaqe2; + u64 qpx_rdmaqe3; + u64 qpx_nrpsnhwm; +/* 0x298*/ + u64 reserved[(0x400 - 0x298) / 8]; +/* 0x400 extended data */ + u64 reserved_ext[(0x500 - 0x400) / 8]; +/* 0x500 */ + u64 reserved2[(0x1000 - 0x500) / 8]; +/* 0x1000 */ +}; + +#define QPX_SQADDER EHCA_BMASK_IBM(48,63) +#define QPX_RQADDER EHCA_BMASK_IBM(48,63) + +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) + +/* MRMWPT Entry Memory Map */ +struct hipz_mrmwmm { + /* 0x00 */ + u64 mrx_hcr; + + u64 mrx_c; + u64 mrx_herr; + u64 mrx_aer; + /* 0x20 */ + u64 mrx_pp; + u64 reserved1; + u64 reserved2; + u64 reserved3; + /* 0x40 */ + u64 reserved4[(0x200 - 0x40) / 8]; + /* 0x200 */ + u64 mrx_ctl[64]; + +}; + +#define MRX_HCR_LPARID_VALID EHCA_BMASK_IBM(0,0) + +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x) + +struct hipz_qpedmm { + /* 0x00 */ + u64 reserved0[(0x400) / 8]; + /* 0x400 */ + u64 qpedx_phh; + u64 qpedx_ppsgp; + /* 0x410 */ + u64 qpedx_ppsgu; + u64 qpedx_ppdgp; + /* 0x420 */ + u64 qpedx_ppdgu; + u64 qpedx_aph; + /* 0x430 */ + u64 qpedx_apsgp; + u64 qpedx_apsgu; + /* 0x440 */ + u64 qpedx_apdgp; + u64 qpedx_apdgu; + /* 0x450 */ + u64 qpedx_apav; + u64 qpedx_apsav; + /* 0x460 */ + u64 qpedx_hcr; + u64 reserved1[4]; + /* 0x488 */ + u64 qpedx_rrl0; + /* 0x490 */ + u64 qpedx_rrrkey0; + u64 qpedx_rrva0; + /* 0x4a0 */ + u64 reserved2; + u64 qpedx_rrl1; + /* 0x4b0 */ + u64 qpedx_rrrkey1; + u64 qpedx_rrva1; + /* 0x4c0 */ + u64 reserved3; + u64 qpedx_rrl2; + /* 0x4d0 */ + u64 qpedx_rrrkey2; + u64 qpedx_rrva2; + /* 0x4e0 */ + u64 reserved4; + u64 qpedx_rrl3; + /* 0x4f0 */ + u64 qpedx_rrrkey3; + u64 qpedx_rrva3; +}; + +#define QPEDMM_OFFSET(x) offsetof(struct hipz_QPEDMM,x) + +/* CQ Table Entry Memory Map */ +struct hipz_cqtemm { + u64 cqx_hcr; + u64 cqx_c; + u64 cqx_herr; + u64 cqx_aer; +/* 0x20 */ + u64 cqx_ptp; + u64 cqx_tp; + u64 cqx_fec; + u64 cqx_feca; +/* 0x40 */ + u64 cqx_ep; + u64 cqx_eq; +/* 0x50 */ + u64 reserved1; + u64 cqx_n0; +/* 0x60 */ + u64 cqx_n1; + u64 reserved2[(0x1000 - 0x60) / 8]; +/* 0x1000 */ +}; + +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63) +#define CQX_FECADDER EHCA_BMASK_IBM(32,63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0) + +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x) + +/* EQ Table Entry Memory Map */ +struct hipz_eqtemm { + u64 eqx_hcr; + u64 eqx_c; + + u64 eqx_herr; + u64 eqx_aer; +/* 0x20 */ + u64 eqx_ptp; + u64 eqx_tp; + u64 eqx_ssba; + u64 eqx_psba; + +/* 0x40 */ + u64 eqx_cec; + u64 eqx_meql; + u64 eqx_xisbi; + u64 eqx_xisc; +/* 0x60 */ + u64 eqx_it; + +}; + +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x) + +/* access control defines for MR/MW */ +#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 +#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 +#define HIPZ_ACCESSCTRL_R_READ 0x00200000 +#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 +#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 + +/* query hca response block */ +struct hipz_query_hca { + u32 cur_reliable_dg; + u32 cur_qp; + u32 cur_cq; + u32 cur_eq; + u32 cur_mr; + u32 cur_mw; + u32 cur_ee_context; + u32 cur_mcast_grp; + u32 cur_qp_attached_mcast_grp; + u32 reserved1; + u32 cur_ipv6_qp; + u32 cur_eth_qp; + u32 cur_hp_mr; + u32 reserved2[3]; + u32 max_rd_domain; + u32 max_qp; + u32 max_cq; + u32 max_eq; + u32 max_mr; + u32 max_hp_mr; + u32 max_mw; + u32 max_mrwpte; + u32 max_special_mrwpte; + u32 max_rd_ee_context; + u32 max_mcast_grp; + u32 max_total_mcast_qp_attach; + u32 max_mcast_qp_attach; + u32 max_raw_ipv6_qp; + u32 max_raw_ethy_qp; + u32 internal_clock_frequency; + u32 max_pd; + u32 max_ah; + u32 max_cqe; + u32 max_wqes_wq; + u32 max_partitions; + u32 max_rr_ee_context; + u32 max_rr_qp; + u32 max_rr_hca; + u32 max_act_wqs_ee_context; + u32 max_act_wqs_qp; + u32 max_sge; + u32 max_sge_rd; + u32 memory_page_size_supported; + u64 max_mr_size; + u32 local_ca_ack_delay; + u32 num_ports; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 node_guid; + u64 hca_cap_indicators; + u32 data_counter_register_size; + u32 max_shared_rq; + u32 max_isns_eq; + u32 max_neq; +} __attribute__ ((packed)); + +/* query port response block */ +struct hipz_query_port { + u32 state; + u32 bad_pkey_cntr; + u32 lmc; + u32 lid; + u32 subnet_timeout; + u32 qkey_viol_cntr; + u32 sm_sl; + u32 sm_lid; + u32 capability_mask; + u32 init_type_reply; + u32 pkey_tbl_len; + u32 gid_tbl_len; + u64 gid_prefix; + u32 port_nr; + u16 pkey_entries[16]; + u8 reserved1[32]; + u32 trent_size; + u32 trbuf_size; + u64 max_msg_sz; + u32 max_mtu; + u32 vl_cap; + u8 reserved2[1900]; + u64 guid_entries[255]; +} __attribute__ ((packed)); + +#endif diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c new file mode 100644 index 0000000..a14f957 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -0,0 +1,166 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define DEB_PREFIX "iptz" + +#include "ehca_tools.h" +#include "ipz_pt_fn.h" + +extern int ehca_hwlevel; + +void *ipz_qpageit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->pagesize; + if (queue->current_q_offset > queue->queue_length) { + queue->current_q_offset -= queue->pagesize; + ret = NULL; + } + if (((u64)ret) % EHCA_PAGESIZE) { + EDEB(4, "ERROR!! not at PAGE-Boundary"); + return NULL; + } + EDEB(7, "queue=%p ret=%p", queue, ret); + return ret; +} + +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u64 last_entry_in_q = queue->queue_length - queue->qe_size; + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset > last_entry_in_q) { + queue->current_q_offset = 0; + queue->toggle_state = (~queue->toggle_state) & 1; + } + + EDEB(7, "queue=%p ret=%p new current_q_offset=%lx qe_size=%x", + queue, ret, queue->current_q_offset, queue->qe_size); + + return ret; +} + +int ipz_queue_ctor(struct ipz_queue *queue, + const u32 nr_of_pages, + const u32 pagesize, const u32 qe_size, const u32 nr_of_sg) +{ + int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; + int f; + + EDEB_EN(7, "nr_of_pages=%x pagesize=%x qe_size=%x pages_per_kpage=%x", + nr_of_pages, pagesize, qe_size, pages_per_kpage); + if (pagesize > PAGE_SIZE) { + EDEB_ERR(4, "FATAL ERROR: pagesize=%x is greater than " + "kernel page size", pagesize); + return 0; + } + if (!pages_per_kpage) { + EDEB_ERR(4, "FATAL ERROR: invalid kernel page size. " + "pages_per_kpage=%x", pages_per_kpage); + return 0; + } + queue->queue_length = nr_of_pages * pagesize; + queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + EDEB(4, "ERROR!! didn't get the memory"); + return 0; + } + memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); + /* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hca queue pages + */ + f = 0; + while (f < nr_of_pages) { + u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL); + int k; + if (!kpage) + goto ipz_queue_ctor_exit0; /*NOMEM*/ + for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) { + (queue->queue_pages)[f] = (struct ipz_page *)kpage; + kpage += EHCA_PAGESIZE; + f++; + } + } + + queue->current_q_offset = 0; + queue->qe_size = qe_size; + queue->act_nr_of_sg = nr_of_sg; + queue->pagesize = pagesize; + queue->toggle_state = 1; + EDEB_EX(7, "queue_length=%x queue_pages=%p qe_size=%x" + " act_nr_of_sg=%x", queue->queue_length, queue->queue_pages, + queue->qe_size, queue->act_nr_of_sg); + return 1; + + ipz_queue_ctor_exit0: + EDEB_ERR(4, "Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x", + queue, f, nr_of_pages); + for (f = 0; f < nr_of_pages; f += pages_per_kpage) { + if (!(queue->queue_pages)[f]) + break; + free_page((unsigned long)(queue->queue_pages)[f]); + } + return 0; +} + +int ipz_queue_dtor(struct ipz_queue *queue) +{ + int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; + int g; + int nr_pages; + + EDEB_EN(7, "ipz_queue pointer=%p", queue); + if (!queue || !queue->queue_pages) { + EDEB_ERR(4, "queue or queue_pages is NULL"); + return 0; + } + EDEB(7, "destructing a queue with the following " + "properties:\n nr_of_pages=%x pagesize=%x qe_size=%x", + queue->act_nr_of_sg, queue->pagesize, queue->qe_size); + nr_pages = queue->queue_length / queue->pagesize; + for (g = 0; g < nr_pages; g += pages_per_kpage) + free_page((unsigned long)(queue->queue_pages)[g]); + vfree(queue->queue_pages); + + EDEB_EX(7, "queue freed!"); + return 1; +} diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h new file mode 100644 index 0000000..b4e87a2 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -0,0 +1,252 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IPZ_PT_FN_H__ +#define __IPZ_PT_FN_H__ + +#include "ehca_qes.h" +#define EHCA_PAGESHIFT 12 +#define EHCA_PAGESIZE 4096UL +#define EHCA_PT_ENTRIES 512UL + +#include "ehca_tools.h" +#include "ehca_qes.h" + +/* struct generic ehca page */ +struct ipz_page { + u8 entries[EHCA_PAGESIZE]; +}; + +/* struct generic queue in linux kernel virtual memory (kv) */ +struct ipz_queue { + u64 current_q_offset; /* current queue entry */ + + struct ipz_page **queue_pages; /* array of pages belonging to queue */ + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; /* toggle flag - per page */ + u32 dummy3; /* 64 bit alignment */ +}; + +/* + * return current Queue Entry for a certain q_offset + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) +{ + struct ipz_page *current_page = NULL; + if (q_offset >= queue->queue_length) + return NULL; + current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; + return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; +} + +/* + * return current Queue Entry + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_get(struct ipz_queue *queue) +{ + return ipz_qeit_calc(queue, queue->current_q_offset); +} + +/* + * return current Queue Page , increment Queue Page iterator from + * page to page in struct ipz_queue, last increment will return 0! and + * NOT wrap + * returns address (kv) of Queue Page + * warning don't use in parallel with ipz_QE_get_inc() + */ +void *ipz_qpageit_get_inc(struct ipz_queue *queue); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset >= queue->queue_length) { + queue->current_q_offset = 0; + /* toggle the valid flag */ + queue->toggle_state = (~queue->toggle_state) & 1; + } + + EDEB(7, "queue=%p ret=%p new current_q_addr=%lx qe_size=%x", + queue, ret, queue->current_q_offset, queue->qe_size); + + return ret; +} + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + u32 cqe_flags = cqe->cqe_flags; + + if ((cqe_flags >> 7) != (queue->toggle_state & 1)) + return NULL; + + ipz_qeit_get_inc(queue); + return cqe; +} + +/* + * returns and resets Queue Entry iterator + * returns address (kv) of first Queue Entry + */ +static inline void *ipz_qeit_reset(struct ipz_queue *queue) +{ + queue->current_q_offset = 0; + return ipz_qeit_get(queue); +} + +/* struct generic page table */ +struct ipz_pt { + u64 entries[EHCA_PT_ENTRIES]; +}; + +/* struct page table for a queue, only to be used in pf */ +struct ipz_qpt { + /* queue page tables (kv), use u64 because we know the element length */ + u64 *qpts; + u32 allocated_qpts_entries; + u32 nr_of_PTEs; /* number of page table entries PTE iterators */ + u64 *current_pte_addr; +}; + +/* + * constructor for a ipz_queue_t, placement new for ipz_queue_t, + * new for all dependent datastructors + * all QP Tables are the same + * flow: + * allocate+pin queue + * see ipz_qpt_ctor() + * returns true if ok, false if out of memory + */ +int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages, + const u32 pagesize, const u32 qe_size, + const u32 nr_of_sg); + +/* + * destructor for a ipz_queue_t + * -# free queue + * see ipz_queue_ctor() + * returns true if ok, false if queue was NULL-ptr of free failed + */ +int ipz_queue_dtor(struct ipz_queue *queue); + +/* + * constructor for a ipz_qpt_t, + * placement new for struct ipz_queue, new for all dependent datastructors + * all QP Tables are the same, + * flow: + * -# allocate+pin queue + * -# initialise ptcb + * -# allocate+pin PTs + * -# link PTs to a ring, according to HCA Arch, set bit62 id needed + * -# the ring must have room for exactly nr_of_PTEs + * see ipz_qpt_ctor() + */ +void ipz_qpt_ctor(struct ipz_qpt *qpt, + const u32 nr_of_QEs, + const u32 pagesize, + const u32 qe_size, + const u8 lowbyte, const u8 toggle, + u32 * act_nr_of_QEs, u32 * act_nr_of_pages); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + * fix EQ page problems + */ +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); + +/* + * return current Event Queue Entry, increment Queue Entry iterator + * by one step in struct ipz_queue if valid, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_queue_QPageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *) ret; + EDEB(7, "ipz_QEit_EQ_get_inc_valid qe=%x", qe); + if ((qe >> 7) == (queue->toggle_state & 1)) + ipz_qeit_eq_get_inc(queue); /* this is a good one */ + else + ret = NULL; + return ret; +} + +/* returns address (GX) of first queue entry */ +static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) +{ + return be64_to_cpu(qpt->qpts[0]); +} + +/* returns address (kv) of first page of queue page table */ +static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) +{ + return qpt->qpts; +} + +#endif /* __IPZ_PT_FN_H__ */ diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig new file mode 100644 index 0000000..fead87d --- /dev/null +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -0,0 +1,11 @@ +config INFINIBAND_ISER + tristate "ISCSI RDMA Protocol" + depends on INFINIBAND && SCSI + select SCSI_ISCSI_ATTRS + ---help--- + Support for the ISCSI RDMA Protocol over InfiniBand. This + allows you to access storage devices that speak ISER/ISCSI + over InfiniBand. + + The ISER protocol is defined by IETF. + See . diff --git a/drivers/infiniband/ulp/iser/Makefile b/drivers/infiniband/ulp/iser/Makefile new file mode 100644 index 0000000..fe6cd15 --- /dev/null +++ b/drivers/infiniband/ulp/iser/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_INFINIBAND_ISER) += ib_iser.o + +ib_iser-y := iser_verbs.o iser_initiator.o iser_memory.o \ + iscsi_iser.o diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c new file mode 100644 index 0000000..4c3f2de --- /dev/null +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -0,0 +1,790 @@ +/* + * iSCSI Initiator over iSER Data-Path + * + * Copyright (C) 2004 Dmitry Yusupov + * Copyright (C) 2004 Alex Aizman + * Copyright (C) 2005 Mike Christie + * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved. + * maintained by openib-general@openib.org + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Credits: + * Christoph Hellwig + * FUJITA Tomonori + * Arne Redlich + * Zhenyu Wang + * Modified by: + * Erez Zilber + * + * + * $Id: iscsi_iser.c 6965 2006-05-07 11:36:20Z ogerlitz $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "iscsi_iser.h" + +static unsigned int iscsi_max_lun = 512; +module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); + +int iser_debug_level = 0; + +MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover " + "v" DRV_VER " (" DRV_DATE ")"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz"); + +module_param_named(debug_level, iser_debug_level, int, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); + +struct iser_global ig; + +void +iscsi_iser_recv(struct iscsi_conn *conn, + struct iscsi_hdr *hdr, char *rx_data, int rx_data_len) +{ + int rc = 0; + uint32_t ret_itt; + int datalen; + int ahslen; + + /* verify PDU length */ + datalen = ntoh24(hdr->dlength); + if (datalen != rx_data_len) { + printk(KERN_ERR "iscsi_iser: datalen %d (hdr) != %d (IB) \n", + datalen, rx_data_len); + rc = ISCSI_ERR_DATALEN; + goto error; + } + + /* read AHS */ + ahslen = hdr->hlength * 4; + + /* verify itt (itt encoding: age+cid+itt) */ + rc = iscsi_verify_itt(conn, hdr, &ret_itt); + + if (!rc) + rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); + + if (rc && rc != ISCSI_ERR_NO_SCSI_CMD) + goto error; + + return; +error: + iscsi_conn_failure(conn, rc); +} + + +/** + * iscsi_iser_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands + * + **/ +static void +iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) +{ + struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data; + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct scsi_cmnd *sc = ctask->sc; + + iser_ctask->command_sent = 0; + iser_ctask->iser_conn = iser_conn; + + if (sc->sc_data_direction == DMA_TO_DEVICE) { + BUG_ON(ctask->total_length == 0); + /* bytes to be sent via RDMA operations */ + iser_ctask->rdma_data_count = ctask->total_length - + ctask->imm_count - + ctask->unsol_count; + + debug_scsi("cmd [itt %x total %d imm %d unsol_data %d " + "rdma_data %d]\n", + ctask->itt, ctask->total_length, ctask->imm_count, + ctask->unsol_count, iser_ctask->rdma_data_count); + } else + /* bytes to be sent via RDMA operations */ + iser_ctask->rdma_data_count = ctask->total_length; + + iser_ctask_rdma_init(iser_ctask); +} + +/** + * iscsi_mtask_xmit - xmit management(immediate) task + * @conn: iscsi connection + * @mtask: task management task + * + * Notes: + * The function can return -EAGAIN in which case caller must + * call it again later, or recover. '0' return code means successful + * xmit. + * + **/ +static int +iscsi_iser_mtask_xmit(struct iscsi_conn *conn, + struct iscsi_mgmt_task *mtask) +{ + int error = 0; + + debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id, mtask->itt); + + error = iser_send_control(conn, mtask); + + /* since iser xmits control with zero copy, mtasks can not be recycled + * right after sending them. + * The recycling scheme is based on whether a response is expected + * - if yes, the mtask is recycled at iscsi_complete_pdu + * - if no, the mtask is recycled at iser_snd_completion + */ + if (error && error != -EAGAIN) + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + + return error; +} + +static int +iscsi_iser_ctask_xmit_unsol_data(struct iscsi_conn *conn, + struct iscsi_cmd_task *ctask) +{ + struct iscsi_data hdr; + int error = 0; + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + + /* Send data-out PDUs while there's still unsolicited data to send */ + while (ctask->unsol_count > 0) { + iscsi_prep_unsolicit_data_pdu(ctask, &hdr, + iser_ctask->rdma_data_count); + + debug_scsi("Sending data-out: itt 0x%x, data count %d\n", + hdr.itt, ctask->data_count); + + /* the buffer description has been passed with the command */ + /* Send the command */ + error = iser_send_data_out(conn, ctask, &hdr); + if (error) { + ctask->unsol_datasn--; + goto iscsi_iser_ctask_xmit_unsol_data_exit; + } + ctask->unsol_count -= ctask->data_count; + debug_scsi("Need to send %d more as data-out PDUs\n", + ctask->unsol_count); + } + +iscsi_iser_ctask_xmit_unsol_data_exit: + return error; +} + +static int +iscsi_iser_ctask_xmit(struct iscsi_conn *conn, + struct iscsi_cmd_task *ctask) +{ + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + int error = 0; + + debug_scsi("ctask deq [cid %d itt 0x%x]\n", + conn->id, ctask->itt); + + /* + * serialize with TMF AbortTask + */ + if (ctask->mtask) + return error; + + /* Send the cmd PDU */ + if (!iser_ctask->command_sent) { + error = iser_send_command(conn, ctask); + if (error) + goto iscsi_iser_ctask_xmit_exit; + iser_ctask->command_sent = 1; + } + + /* Send unsolicited data-out PDU(s) if necessary */ + if (ctask->unsol_count) + error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask); + + iscsi_iser_ctask_xmit_exit: + if (error && error != -EAGAIN) + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + return error; +} + +static void +iscsi_iser_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) +{ + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + + if (iser_ctask->status == ISER_TASK_STATUS_STARTED) { + iser_ctask->status = ISER_TASK_STATUS_COMPLETED; + iser_ctask_rdma_finalize(iser_ctask); + } +} + +static struct iser_conn * +iscsi_iser_ib_conn_lookup(__u64 ep_handle) +{ + struct iser_conn *ib_conn; + struct iser_conn *uib_conn = (struct iser_conn *)(unsigned long)ep_handle; + + mutex_lock(&ig.connlist_mutex); + list_for_each_entry(ib_conn, &ig.connlist, conn_list) { + if (ib_conn == uib_conn) { + mutex_unlock(&ig.connlist_mutex); + return ib_conn; + } + } + mutex_unlock(&ig.connlist_mutex); + iser_err("no conn exists for eph %llx\n",(unsigned long long)ep_handle); + return NULL; +} + +static struct iscsi_cls_conn * +iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) +{ + struct iscsi_conn *conn; + struct iscsi_cls_conn *cls_conn; + struct iscsi_iser_conn *iser_conn; + + cls_conn = iscsi_conn_setup(cls_session, conn_idx); + if (!cls_conn) + return NULL; + conn = cls_conn->dd_data; + + /* + * due to issues with the login code re iser sematics + * this not set in iscsi_conn_setup - FIXME + */ + conn->max_recv_dlength = 128; + + iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL); + if (!iser_conn) + goto conn_alloc_fail; + + /* currently this is the only field which need to be initiated */ + rwlock_init(&iser_conn->lock); + + conn->dd_data = iser_conn; + iser_conn->iscsi_conn = conn; + + return cls_conn; + +conn_alloc_fail: + iscsi_conn_teardown(cls_conn); + return NULL; +} + +static void +iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_iser_conn *iser_conn = conn->dd_data; + + iscsi_conn_teardown(cls_conn); + kfree(iser_conn); +} + +static int +iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, + struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, + int is_leading) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_iser_conn *iser_conn; + struct iser_conn *ib_conn; + int error; + + error = iscsi_conn_bind(cls_session, cls_conn, is_leading); + if (error) + return error; + + /* the transport ep handle comes from user space so it must be + * verified against the global ib connections list */ + ib_conn = iscsi_iser_ib_conn_lookup(transport_eph); + if (!ib_conn) { + iser_err("can't bind eph %llx\n", + (unsigned long long)transport_eph); + return -EINVAL; + } + /* binds the iSER connection retrieved from the previously + * connected ep_handle to the iSCSI layer connection. exchanges + * connection pointers */ + iser_err("binding iscsi conn %p to iser_conn %p\n",conn,ib_conn); + iser_conn = conn->dd_data; + ib_conn->iser_conn = iser_conn; + iser_conn->ib_conn = ib_conn; + + conn->recv_lock = &iser_conn->lock; + + return 0; +} + +static int +iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + int err; + + err = iscsi_conn_start(cls_conn); + if (err) + return err; + + return iser_conn_set_full_featured_mode(conn); +} + +static void +iscsi_iser_conn_terminate(struct iscsi_conn *conn) +{ + struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = iser_conn->ib_conn; + + BUG_ON(!ib_conn); + /* starts conn teardown process, waits until all previously * + * posted buffers get flushed, deallocates all conn resources */ + iser_conn_terminate(ib_conn); + iser_conn->ib_conn = NULL; + conn->recv_lock = NULL; +} + + +static struct iscsi_transport iscsi_iser_transport; + +static struct iscsi_cls_session * +iscsi_iser_session_create(struct iscsi_transport *iscsit, + struct scsi_transport_template *scsit, + uint32_t initial_cmdsn, uint32_t *hostno) +{ + struct iscsi_cls_session *cls_session; + struct iscsi_session *session; + int i; + uint32_t hn; + struct iscsi_cmd_task *ctask; + struct iscsi_mgmt_task *mtask; + struct iscsi_iser_cmd_task *iser_ctask; + struct iser_desc *desc; + + cls_session = iscsi_session_setup(iscsit, scsit, + sizeof(struct iscsi_iser_cmd_task), + sizeof(struct iser_desc), + initial_cmdsn, &hn); + if (!cls_session) + return NULL; + + *hostno = hn; + session = class_to_transport_session(cls_session); + + /* libiscsi setup itts, data and pool so just set desc fields */ + for (i = 0; i < session->cmds_max; i++) { + ctask = session->cmds[i]; + iser_ctask = ctask->dd_data; + ctask->hdr = (struct iscsi_cmd *)&iser_ctask->desc.iscsi_header; + } + + for (i = 0; i < session->mgmtpool_max; i++) { + mtask = session->mgmt_cmds[i]; + desc = mtask->dd_data; + mtask->hdr = &desc->iscsi_header; + desc->data = mtask->data; + } + + return cls_session; +} + +static int +iscsi_iser_conn_set_param(struct iscsi_cls_conn *cls_conn, + enum iscsi_param param, uint32_t value) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_session *session = conn->session; + + spin_lock_bh(&session->lock); + if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE && + conn->stop_stage != STOP_CONN_RECOVER) { + printk(KERN_ERR "iscsi_iser: can not change parameter [%d]\n", + param); + spin_unlock_bh(&session->lock); + return 0; + } + spin_unlock_bh(&session->lock); + + switch (param) { + case ISCSI_PARAM_MAX_RECV_DLENGTH: + /* TBD */ + break; + case ISCSI_PARAM_MAX_XMIT_DLENGTH: + conn->max_xmit_dlength = value; + break; + case ISCSI_PARAM_HDRDGST_EN: + if (value) { + printk(KERN_ERR "DataDigest wasn't negotiated to None"); + return -EPROTO; + } + break; + case ISCSI_PARAM_DATADGST_EN: + if (value) { + printk(KERN_ERR "DataDigest wasn't negotiated to None"); + return -EPROTO; + } + break; + case ISCSI_PARAM_INITIAL_R2T_EN: + session->initial_r2t_en = value; + break; + case ISCSI_PARAM_IMM_DATA_EN: + session->imm_data_en = value; + break; + case ISCSI_PARAM_FIRST_BURST: + session->first_burst = value; + break; + case ISCSI_PARAM_MAX_BURST: + session->max_burst = value; + break; + case ISCSI_PARAM_PDU_INORDER_EN: + session->pdu_inorder_en = value; + break; + case ISCSI_PARAM_DATASEQ_INORDER_EN: + session->dataseq_inorder_en = value; + break; + case ISCSI_PARAM_ERL: + session->erl = value; + break; + case ISCSI_PARAM_IFMARKER_EN: + if (value) { + printk(KERN_ERR "IFMarker wasn't negotiated to No"); + return -EPROTO; + } + break; + case ISCSI_PARAM_OFMARKER_EN: + if (value) { + printk(KERN_ERR "OFMarker wasn't negotiated to No"); + return -EPROTO; + } + break; + default: + break; + } + + return 0; +} + +static int +iscsi_iser_session_get_param(struct iscsi_cls_session *cls_session, + enum iscsi_param param, uint32_t *value) +{ + struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); + struct iscsi_session *session = iscsi_hostdata(shost->hostdata); + + switch (param) { + case ISCSI_PARAM_INITIAL_R2T_EN: + *value = session->initial_r2t_en; + break; + case ISCSI_PARAM_MAX_R2T: + *value = session->max_r2t; + break; + case ISCSI_PARAM_IMM_DATA_EN: + *value = session->imm_data_en; + break; + case ISCSI_PARAM_FIRST_BURST: + *value = session->first_burst; + break; + case ISCSI_PARAM_MAX_BURST: + *value = session->max_burst; + break; + case ISCSI_PARAM_PDU_INORDER_EN: + *value = session->pdu_inorder_en; + break; + case ISCSI_PARAM_DATASEQ_INORDER_EN: + *value = session->dataseq_inorder_en; + break; + case ISCSI_PARAM_ERL: + *value = session->erl; + break; + case ISCSI_PARAM_IFMARKER_EN: + *value = 0; + break; + case ISCSI_PARAM_OFMARKER_EN: + *value = 0; + break; + default: + return ISCSI_ERR_PARAM_NOT_FOUND; + } + + return 0; +} + +static int +iscsi_iser_conn_get_param(struct iscsi_cls_conn *cls_conn, + enum iscsi_param param, uint32_t *value) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + + switch(param) { + case ISCSI_PARAM_MAX_RECV_DLENGTH: + *value = conn->max_recv_dlength; + break; + case ISCSI_PARAM_MAX_XMIT_DLENGTH: + *value = conn->max_xmit_dlength; + break; + case ISCSI_PARAM_HDRDGST_EN: + *value = 0; + break; + case ISCSI_PARAM_DATADGST_EN: + *value = 0; + break; + /*case ISCSI_PARAM_TARGET_RECV_DLENGTH: + *value = conn->target_recv_dlength; + break; + case ISCSI_PARAM_INITIATOR_RECV_DLENGTH: + *value = conn->initiator_recv_dlength; + break;*/ + default: + return ISCSI_ERR_PARAM_NOT_FOUND; + } + + return 0; +} + + +static void +iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + + stats->txdata_octets = conn->txdata_octets; + stats->rxdata_octets = conn->rxdata_octets; + stats->scsicmd_pdus = conn->scsicmd_pdus_cnt; + stats->dataout_pdus = conn->dataout_pdus_cnt; + stats->scsirsp_pdus = conn->scsirsp_pdus_cnt; + stats->datain_pdus = conn->datain_pdus_cnt; /* always 0 */ + stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */ + stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt; + stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt; + stats->custom_length = 3; + strcpy(stats->custom[0].desc, "qp_tx_queue_full"); + stats->custom[0].value = 0; /* TB iser_conn->qp_tx_queue_full; */ + strcpy(stats->custom[1].desc, "fmr_map_not_avail"); + stats->custom[1].value = 0; /* TB iser_conn->fmr_map_not_avail */; + strcpy(stats->custom[2].desc, "eh_abort_cnt"); + stats->custom[2].value = conn->eh_abort_cnt; +} + +static int +iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking, + __u64 *ep_handle) +{ + int err; + struct iser_conn *ib_conn; + + err = iser_conn_init(&ib_conn); + if (err) + goto out; + + err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, non_blocking); + if (!err) + *ep_handle = (__u64)(unsigned long)ib_conn; + +out: + return err; +} + +static int +iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms) +{ + struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); + int rc; + + if (!ib_conn) + return -EINVAL; + + rc = wait_event_interruptible_timeout(ib_conn->wait, + ib_conn->state == ISER_CONN_UP, + msecs_to_jiffies(timeout_ms)); + + /* if conn establishment failed, return error code to iscsi */ + if (!rc && + (ib_conn->state == ISER_CONN_TERMINATING || + ib_conn->state == ISER_CONN_DOWN)) + rc = -1; + + iser_err("ib conn %p rc = %d\n", ib_conn, rc); + + if (rc > 0) + return 1; /* success, this is the equivalent of POLLOUT */ + else if (!rc) + return 0; /* timeout */ + else + return rc; /* signal */ +} + +static void +iscsi_iser_ep_disconnect(__u64 ep_handle) +{ + struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); + + if (!ib_conn) + return; + + iser_err("ib conn %p state %d\n",ib_conn, ib_conn->state); + + iser_conn_terminate(ib_conn); +} + +static struct scsi_host_template iscsi_iser_sht = { + .name = "iSCSI Initiator over iSER, v." DRV_VER, + .queuecommand = iscsi_queuecommand, + .can_queue = ISCSI_XMIT_CMDS_MAX - 1, + .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, + .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, + .eh_abort_handler = iscsi_eh_abort, + .eh_host_reset_handler = iscsi_eh_host_reset, + .use_clustering = DISABLE_CLUSTERING, + .proc_name = "iscsi_iser", + .this_id = -1, +}; + +static struct iscsi_transport iscsi_iser_transport = { + .owner = THIS_MODULE, + .name = "iser", + .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T, + .param_mask = ISCSI_MAX_RECV_DLENGTH | + ISCSI_MAX_XMIT_DLENGTH | + ISCSI_HDRDGST_EN | + ISCSI_DATADGST_EN | + ISCSI_INITIAL_R2T_EN | + ISCSI_MAX_R2T | + ISCSI_IMM_DATA_EN | + ISCSI_FIRST_BURST | + ISCSI_MAX_BURST | + ISCSI_PDU_INORDER_EN | + ISCSI_DATASEQ_INORDER_EN, + .host_template = &iscsi_iser_sht, + .conndata_size = sizeof(struct iscsi_conn), + .max_lun = ISCSI_ISER_MAX_LUN, + .max_cmd_len = ISCSI_ISER_MAX_CMD_LEN, + /* session management */ + .create_session = iscsi_iser_session_create, + .destroy_session = iscsi_session_teardown, + /* connection management */ + .create_conn = iscsi_iser_conn_create, + .bind_conn = iscsi_iser_conn_bind, + .destroy_conn = iscsi_iser_conn_destroy, + .set_param = iscsi_iser_conn_set_param, + .get_conn_param = iscsi_iser_conn_get_param, + .get_session_param = iscsi_iser_session_get_param, + .start_conn = iscsi_iser_conn_start, + .stop_conn = iscsi_conn_stop, + /* these are called as part of conn recovery */ + .suspend_conn_recv = NULL, /* FIXME is/how this relvant to iser? */ + .terminate_conn = iscsi_iser_conn_terminate, + /* IO */ + .send_pdu = iscsi_conn_send_pdu, + .get_stats = iscsi_iser_conn_get_stats, + .init_cmd_task = iscsi_iser_cmd_init, + .xmit_cmd_task = iscsi_iser_ctask_xmit, + .xmit_mgmt_task = iscsi_iser_mtask_xmit, + .cleanup_cmd_task = iscsi_iser_cleanup_ctask, + /* recovery */ + .session_recovery_timedout = iscsi_session_recovery_timedout, + + .ep_connect = iscsi_iser_ep_connect, + .ep_poll = iscsi_iser_ep_poll, + .ep_disconnect = iscsi_iser_ep_disconnect +}; + +static int __init iser_init(void) +{ + int err; + + iser_dbg("Starting iSER datamover...\n"); + + if (iscsi_max_lun < 1) { + printk(KERN_ERR "Invalid max_lun value of %u\n", iscsi_max_lun); + return -EINVAL; + } + + iscsi_iser_transport.max_lun = iscsi_max_lun; + + memset(&ig, 0, sizeof(struct iser_global)); + + ig.desc_cache = kmem_cache_create("iser_descriptors", + sizeof (struct iser_desc), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (ig.desc_cache == NULL) + return -ENOMEM; + + /* device init is called only after the first addr resolution */ + mutex_init(&ig.device_list_mutex); + INIT_LIST_HEAD(&ig.device_list); + mutex_init(&ig.connlist_mutex); + INIT_LIST_HEAD(&ig.connlist); + + if (!iscsi_register_transport(&iscsi_iser_transport)) { + iser_err("iscsi_register_transport failed\n"); + err = -EINVAL; + goto register_transport_failure; + } + + return 0; + +register_transport_failure: + kmem_cache_destroy(ig.desc_cache); + + return err; +} + +static void __exit iser_exit(void) +{ + iser_dbg("Removing iSER datamover...\n"); + iscsi_unregister_transport(&iscsi_iser_transport); + kmem_cache_destroy(ig.desc_cache); +} + +module_init(iser_init); +module_exit(iser_exit); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h new file mode 100644 index 0000000..3350ba6 --- /dev/null +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -0,0 +1,354 @@ +/* + * iSER transport for the Open iSCSI Initiator & iSER transport internals + * + * Copyright (C) 2004 Dmitry Yusupov + * Copyright (C) 2004 Alex Aizman + * Copyright (C) 2005 Mike Christie + * based on code maintained by open-iscsi@googlegroups.com + * + * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: iscsi_iser.h 7051 2006-05-10 12:29:11Z ogerlitz $ + */ +#ifndef __ISCSI_ISER_H__ +#define __ISCSI_ISER_H__ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#define DRV_NAME "iser" +#define PFX DRV_NAME ": " +#define DRV_VER "0.1" +#define DRV_DATE "May 7th, 2006" + +#define iser_dbg(fmt, arg...) \ + do { \ + if (iser_debug_level > 0) \ + printk(KERN_DEBUG PFX "%s:" fmt,\ + __func__ , ## arg); \ + } while (0) + +#define iser_err(fmt, arg...) \ + do { \ + printk(KERN_ERR PFX "%s:" fmt, \ + __func__ , ## arg); \ + } while (0) + + /* support upto 512KB in one RDMA */ +#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> PAGE_SHIFT) +#define ISCSI_ISER_MAX_LUN 256 +#define ISCSI_ISER_MAX_CMD_LEN 16 + +/* QP settings */ +/* Maximal bounds on received asynchronous PDUs */ +#define ISER_MAX_RX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */ + +#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * + * SCSI_TMFUNC(2), LOGOUT(1) */ + +#define ISER_QP_MAX_RECV_DTOS (ISCSI_XMIT_CMDS_MAX + \ + ISER_MAX_RX_MISC_PDUS + \ + ISER_MAX_TX_MISC_PDUS) + +/* the max TX (send) WR supported by the iSER QP is defined by * + * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * + * to have at max for SCSI command. The tx posting & completion handling code * + * supports -EAGAIN scheme where tx is suspended till the QP has room for more * + * send WR. D=8 comes from 64K/8K */ + +#define ISER_INFLIGHT_DATAOUTS 8 + +#define ISER_QP_MAX_REQ_DTOS (ISCSI_XMIT_CMDS_MAX * \ + (1 + ISER_INFLIGHT_DATAOUTS) + \ + ISER_MAX_TX_MISC_PDUS + \ + ISER_MAX_RX_MISC_PDUS) + +#define ISER_VER 0x10 +#define ISER_WSV 0x08 +#define ISER_RSV 0x04 + +struct iser_hdr { + u8 flags; + u8 rsvd[3]; + __be32 write_stag; /* write rkey */ + __be64 write_va; + __be32 read_stag; /* read rkey */ + __be64 read_va; +} __attribute__((packed)); + + +/* Length of an object name string */ +#define ISER_OBJECT_NAME_SIZE 64 + +enum iser_ib_conn_state { + ISER_CONN_INIT, /* descriptor allocd, no conn */ + ISER_CONN_PENDING, /* in the process of being established */ + ISER_CONN_UP, /* up and running */ + ISER_CONN_TERMINATING, /* in the process of being terminated */ + ISER_CONN_DOWN, /* shut down */ + ISER_CONN_STATES_NUM +}; + +enum iser_task_status { + ISER_TASK_STATUS_INIT = 0, + ISER_TASK_STATUS_STARTED, + ISER_TASK_STATUS_COMPLETED +}; + +enum iser_data_dir { + ISER_DIR_IN = 0, /* to initiator */ + ISER_DIR_OUT, /* from initiator */ + ISER_DIRS_NUM +}; + +struct iser_data_buf { + void *buf; /* pointer to the sg list */ + unsigned int size; /* num entries of this sg */ + unsigned long data_len; /* total data len */ + unsigned int dma_nents; /* returned by dma_map_sg */ + char *copy_buf; /* allocated copy buf for SGs unaligned * + * for rdma which are copied */ + struct scatterlist sg_single; /* SG-ified clone of a non SG SC or * + * unaligned SG */ + }; + +/* fwd declarations */ +struct iser_device; +struct iscsi_iser_conn; +struct iscsi_iser_cmd_task; + +struct iser_mem_reg { + u32 lkey; + u32 rkey; + u64 va; + u64 len; + void *mem_h; +}; + +struct iser_regd_buf { + struct iser_mem_reg reg; /* memory registration info */ + void *virt_addr; + struct iser_device *device; /* device->device for dma_unmap */ + dma_addr_t dma_addr; /* if non zero, addr for dma_unmap */ + enum dma_data_direction direction; /* direction for dma_unmap */ + unsigned int data_size; + atomic_t ref_count; /* refcount, freed when dec to 0 */ +}; + +#define MAX_REGD_BUF_VECTOR_LEN 2 + +struct iser_dto { + struct iscsi_iser_cmd_task *ctask; + struct iscsi_iser_conn *conn; + int notify_enable; + + /* vector of registered buffers */ + unsigned int regd_vector_len; + struct iser_regd_buf *regd[MAX_REGD_BUF_VECTOR_LEN]; + + /* offset into the registered buffer may be specified */ + unsigned int offset[MAX_REGD_BUF_VECTOR_LEN]; + + /* a smaller size may be specified, if 0, then full size is used */ + unsigned int used_sz[MAX_REGD_BUF_VECTOR_LEN]; +}; + +enum iser_desc_type { + ISCSI_RX, + ISCSI_TX_CONTROL , + ISCSI_TX_SCSI_COMMAND, + ISCSI_TX_DATAOUT +}; + +struct iser_desc { + struct iser_hdr iser_header; + struct iscsi_hdr iscsi_header; + struct iser_regd_buf hdr_regd_buf; + void *data; /* used by RX & TX_CONTROL */ + struct iser_regd_buf data_regd_buf; /* used by RX & TX_CONTROL */ + enum iser_desc_type type; + struct iser_dto dto; +}; + +struct iser_device { + struct ib_device *ib_device; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_mr *mr; + struct tasklet_struct cq_tasklet; + struct list_head ig_list; /* entry in ig devices list */ + int refcount; +}; + +struct iser_conn { + struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ + enum iser_ib_conn_state state; /* rdma connection state */ + spinlock_t lock; /* used for state changes */ + struct iser_device *device; /* device context */ + struct rdma_cm_id *cma_id; /* CMA ID */ + struct ib_qp *qp; /* QP */ + struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ + int disc_evt_flag; /* disconn event delivered */ + wait_queue_head_t wait; /* waitq for conn/disconn */ + atomic_t post_recv_buf_count; /* posted rx count */ + atomic_t post_send_buf_count; /* posted tx count */ + struct work_struct comperror_work; /* conn term sleepable ctx*/ + char name[ISER_OBJECT_NAME_SIZE]; + struct iser_page_vec *page_vec; /* represents SG to fmr maps* + * maps serialized as tx is*/ + struct list_head conn_list; /* entry in ig conn list */ +}; + +struct iscsi_iser_conn { + struct iscsi_conn *iscsi_conn;/* ptr to iscsi conn */ + struct iser_conn *ib_conn; /* iSER IB conn */ + + rwlock_t lock; +}; + +struct iscsi_iser_cmd_task { + struct iser_desc desc; + struct iscsi_iser_conn *iser_conn; + int rdma_data_count;/* RDMA bytes */ + enum iser_task_status status; + int command_sent; /* set if command sent */ + int dir[ISER_DIRS_NUM]; /* set if dir use*/ + struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ + struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ + struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ +}; + +struct iser_page_vec { + u64 *pages; + int length; + int offset; + int data_size; +}; + +struct iser_global { + struct mutex device_list_mutex;/* */ + struct list_head device_list; /* all iSER devices */ + struct mutex connlist_mutex; + struct list_head connlist; /* all iSER IB connections */ + + kmem_cache_t *desc_cache; +}; + +extern struct iser_global ig; +extern int iser_debug_level; + +/* allocate connection resources needed for rdma functionality */ +int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); + +int iser_send_control(struct iscsi_conn *conn, + struct iscsi_mgmt_task *mtask); + +int iser_send_command(struct iscsi_conn *conn, + struct iscsi_cmd_task *ctask); + +int iser_send_data_out(struct iscsi_conn *conn, + struct iscsi_cmd_task *ctask, + struct iscsi_data *hdr); + +void iscsi_iser_recv(struct iscsi_conn *conn, + struct iscsi_hdr *hdr, + char *rx_data, + int rx_data_len); + +int iser_conn_init(struct iser_conn **ib_conn); + +void iser_conn_terminate(struct iser_conn *ib_conn); + +void iser_conn_release(struct iser_conn *ib_conn); + +void iser_rcv_completion(struct iser_desc *desc, + unsigned long dto_xfer_len); + +void iser_snd_completion(struct iser_desc *desc); + +void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *ctask); + +void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *ctask); + +void iser_dto_buffs_release(struct iser_dto *dto); + +int iser_regd_buff_release(struct iser_regd_buf *regd_buf); + +void iser_reg_single(struct iser_device *device, + struct iser_regd_buf *regd_buf, + enum dma_data_direction direction); + +int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, + enum iser_data_dir cmd_dir); + +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, + enum iser_data_dir cmd_dir); + +int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *ctask, + enum iser_data_dir cmd_dir); + +int iser_connect(struct iser_conn *ib_conn, + struct sockaddr_in *src_addr, + struct sockaddr_in *dst_addr, + int non_blocking); + +int iser_reg_page_vec(struct iser_conn *ib_conn, + struct iser_page_vec *page_vec, + struct iser_mem_reg *mem_reg); + +void iser_unreg_mem(struct iser_mem_reg *mem_reg); + +int iser_post_recv(struct iser_desc *rx_desc); +int iser_post_send(struct iser_desc *tx_desc); + +int iser_conn_state_comp(struct iser_conn *ib_conn, + enum iser_ib_conn_state comp); +#endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c new file mode 100644 index 0000000..ccf56f6 --- /dev/null +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -0,0 +1,738 @@ +/* + * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: iser_initiator.c 6964 2006-05-07 11:11:43Z ogerlitz $ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iscsi_iser.h" + +/* Constant PDU lengths calculations */ +#define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \ + sizeof (struct iscsi_hdr)) + +/* iser_dto_add_regd_buff - increments the reference count for * + * the registered buffer & adds it to the DTO object */ +static void iser_dto_add_regd_buff(struct iser_dto *dto, + struct iser_regd_buf *regd_buf, + unsigned long use_offset, + unsigned long use_size) +{ + int add_idx; + + atomic_inc(®d_buf->ref_count); + + add_idx = dto->regd_vector_len; + dto->regd[add_idx] = regd_buf; + dto->used_sz[add_idx] = use_size; + dto->offset[add_idx] = use_offset; + + dto->regd_vector_len++; +} + +static int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir) +{ + struct device *dma_device; + + iser_ctask->dir[iser_dir] = 1; + dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + + data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir); + if (data->dma_nents == 0) { + iser_err("dma_map_sg failed!!!\n"); + return -EINVAL; + } + return 0; +} + +static void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) +{ + struct device *dma_device; + struct iser_data_buf *data; + + dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + + if (iser_ctask->dir[ISER_DIR_IN]) { + data = &iser_ctask->data[ISER_DIR_IN]; + dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE); + } + + if (iser_ctask->dir[ISER_DIR_OUT]) { + data = &iser_ctask->data[ISER_DIR_OUT]; + dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE); + } +} + +/* Register user buffer memory and initialize passive rdma + * dto descriptor. Total data size is stored in + * iser_ctask->data[ISER_DIR_IN].data_len + */ +static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask, + unsigned int edtl) + +{ + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iser_regd_buf *regd_buf; + int err; + struct iser_hdr *hdr = &iser_ctask->desc.iser_header; + struct iser_data_buf *buf_in = &iser_ctask->data[ISER_DIR_IN]; + + err = iser_dma_map_task_data(iser_ctask, + buf_in, + ISER_DIR_IN, + DMA_FROM_DEVICE); + if (err) + return err; + + if (edtl > iser_ctask->data[ISER_DIR_IN].data_len) { + iser_err("Total data length: %ld, less than EDTL: " + "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", + iser_ctask->data[ISER_DIR_IN].data_len, edtl, + ctask->itt, iser_ctask->iser_conn); + return -EINVAL; + } + + err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_IN); + if (err) { + iser_err("Failed to set up Data-IN RDMA\n"); + return err; + } + regd_buf = &iser_ctask->rdma_regd[ISER_DIR_IN]; + + hdr->flags |= ISER_RSV; + hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); + hdr->read_va = cpu_to_be64(regd_buf->reg.va); + + iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", + ctask->itt, regd_buf->reg.rkey, + (unsigned long long)regd_buf->reg.va); + + return 0; +} + +/* Register user buffer memory and initialize passive rdma + * dto descriptor. Total data size is stored in + * ctask->data[ISER_DIR_OUT].data_len + */ +static int +iser_prepare_write_cmd(struct iscsi_cmd_task *ctask, + unsigned int imm_sz, + unsigned int unsol_sz, + unsigned int edtl) +{ + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iser_regd_buf *regd_buf; + int err; + struct iser_dto *send_dto = &iser_ctask->desc.dto; + struct iser_hdr *hdr = &iser_ctask->desc.iser_header; + struct iser_data_buf *buf_out = &iser_ctask->data[ISER_DIR_OUT]; + + err = iser_dma_map_task_data(iser_ctask, + buf_out, + ISER_DIR_OUT, + DMA_TO_DEVICE); + if (err) + return err; + + if (edtl > iser_ctask->data[ISER_DIR_OUT].data_len) { + iser_err("Total data length: %ld, less than EDTL: %d, " + "in WRITE cmd BHS itt: %d, conn: 0x%p\n", + iser_ctask->data[ISER_DIR_OUT].data_len, + edtl, ctask->itt, ctask->conn); + return -EINVAL; + } + + err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_OUT); + if (err != 0) { + iser_err("Failed to register write cmd RDMA mem\n"); + return err; + } + + regd_buf = &iser_ctask->rdma_regd[ISER_DIR_OUT]; + + if (unsol_sz < edtl) { + hdr->flags |= ISER_WSV; + hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey); + hdr->write_va = cpu_to_be64(regd_buf->reg.va + unsol_sz); + + iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " + "VA:%#llX + unsol:%d\n", + ctask->itt, regd_buf->reg.rkey, + (unsigned long long)regd_buf->reg.va, unsol_sz); + } + + if (imm_sz > 0) { + iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", + ctask->itt, imm_sz); + iser_dto_add_regd_buff(send_dto, + regd_buf, + 0, + imm_sz); + } + + return 0; +} + +/** + * iser_post_receive_control - allocates, initializes and posts receive DTO. + */ +static int iser_post_receive_control(struct iscsi_conn *conn) +{ + struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_desc *rx_desc; + struct iser_regd_buf *regd_hdr; + struct iser_regd_buf *regd_data; + struct iser_dto *recv_dto = NULL; + struct iser_device *device = iser_conn->ib_conn->device; + int rx_data_size, err = 0; + + rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); + if (rx_desc == NULL) { + iser_err("Failed to alloc desc for post recv\n"); + return -ENOMEM; + } + rx_desc->type = ISCSI_RX; + + /* for the login sequence we must support rx of upto 8K; login is done + * after conn create/bind (connect) and conn stop/bind (reconnect), + * what's common for both schemes is that the connection is not started + */ + if (conn->c_stage != ISCSI_CONN_STARTED) + rx_data_size = DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH; + else /* FIXME till user space sets conn->max_recv_dlength correctly */ + rx_data_size = 128; + + rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); + if (rx_desc->data == NULL) { + iser_err("Failed to alloc data buf for post recv\n"); + err = -ENOMEM; + goto post_rx_kmalloc_failure; + } + + recv_dto = &rx_desc->dto; + recv_dto->conn = iser_conn; + recv_dto->regd_vector_len = 0; + + regd_hdr = &rx_desc->hdr_regd_buf; + memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); + regd_hdr->device = device; + regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */ + regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; + + iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); + + iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); + + regd_data = &rx_desc->data_regd_buf; + memset(regd_data, 0, sizeof(struct iser_regd_buf)); + regd_data->device = device; + regd_data->virt_addr = rx_desc->data; + regd_data->data_size = rx_data_size; + + iser_reg_single(device, regd_data, DMA_FROM_DEVICE); + + iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); + + err = iser_post_recv(rx_desc); + if (!err) + return 0; + + /* iser_post_recv failed */ + iser_dto_buffs_release(recv_dto); + kfree(rx_desc->data); +post_rx_kmalloc_failure: + kmem_cache_free(ig.desc_cache, rx_desc); + return err; +} + +/* creates a new tx descriptor and adds header regd buffer */ +static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, + struct iser_desc *tx_desc) +{ + struct iser_regd_buf *regd_hdr = &tx_desc->hdr_regd_buf; + struct iser_dto *send_dto = &tx_desc->dto; + + memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); + regd_hdr->device = iser_conn->ib_conn->device; + regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ + regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; + + send_dto->conn = iser_conn; + send_dto->notify_enable = 1; + send_dto->regd_vector_len = 0; + + memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); + tx_desc->iser_header.flags = ISER_VER; + + iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0); +} + +/** + * iser_conn_set_full_featured_mode - (iSER API) + */ +int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) +{ + struct iscsi_iser_conn *iser_conn = conn->dd_data; + + int i; + /* no need to keep it in a var, we are after login so if this should + * be negotiated, by now the result should be available here */ + int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS; + + iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num); + + /* Check that there is no posted recv or send buffers left - */ + /* they must be consumed during the login phase */ + BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0); + BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); + + /* Initial post receive buffers */ + for (i = 0; i < initial_post_recv_bufs_num; i++) { + if (iser_post_receive_control(conn) != 0) { + iser_err("Failed to post recv bufs at:%d conn:0x%p\n", + i, conn); + return -ENOMEM; + } + } + iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn); + return 0; +} + +static int +iser_check_xmit(struct iscsi_conn *conn, void *task) +{ + int rc = 0; + struct iscsi_iser_conn *iser_conn = conn->dd_data; + + write_lock_bh(conn->recv_lock); + if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == + ISER_QP_MAX_REQ_DTOS) { + iser_dbg("%ld can't xmit task %p, suspending tx\n",jiffies,task); + set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + rc = -EAGAIN; + } + write_unlock_bh(conn->recv_lock); + return rc; +} + + +/** + * iser_send_command - send command PDU + */ +int iser_send_command(struct iscsi_conn *conn, + struct iscsi_cmd_task *ctask) +{ + struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iser_dto *send_dto = NULL; + unsigned long edtl; + int err = 0; + struct iser_data_buf *data_buf; + + struct iscsi_cmd *hdr = ctask->hdr; + struct scsi_cmnd *sc = ctask->sc; + + if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { + iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); + return -EPERM; + } + if (iser_check_xmit(conn, ctask)) + return -EAGAIN; + + edtl = ntohl(hdr->data_length); + + /* build the tx desc regd header and add it to the tx desc dto */ + iser_ctask->desc.type = ISCSI_TX_SCSI_COMMAND; + send_dto = &iser_ctask->desc.dto; + send_dto->ctask = iser_ctask; + iser_create_send_desc(iser_conn, &iser_ctask->desc); + + if (hdr->flags & ISCSI_FLAG_CMD_READ) + data_buf = &iser_ctask->data[ISER_DIR_IN]; + else + data_buf = &iser_ctask->data[ISER_DIR_OUT]; + + if (sc->use_sg) { /* using a scatter list */ + data_buf->buf = sc->request_buffer; + data_buf->size = sc->use_sg; + } else if (sc->request_bufflen) { + /* using a single buffer - convert it into one entry SG */ + sg_init_one(&data_buf->sg_single, + sc->request_buffer, sc->request_bufflen); + data_buf->buf = &data_buf->sg_single; + data_buf->size = 1; + } + + data_buf->data_len = sc->request_bufflen; + + if (hdr->flags & ISCSI_FLAG_CMD_READ) { + err = iser_prepare_read_cmd(ctask, edtl); + if (err) + goto send_command_error; + } + if (hdr->flags & ISCSI_FLAG_CMD_WRITE) { + err = iser_prepare_write_cmd(ctask, + ctask->imm_count, + ctask->imm_count + + ctask->unsol_count, + edtl); + if (err) + goto send_command_error; + } + + iser_reg_single(iser_conn->ib_conn->device, + send_dto->regd[0], DMA_TO_DEVICE); + + if (iser_post_receive_control(conn) != 0) { + iser_err("post_recv failed!\n"); + err = -ENOMEM; + goto send_command_error; + } + + iser_ctask->status = ISER_TASK_STATUS_STARTED; + + err = iser_post_send(&iser_ctask->desc); + if (!err) + return 0; + +send_command_error: + iser_dto_buffs_release(send_dto); + iser_err("conn %p failed ctask->itt %d err %d\n",conn, ctask->itt, err); + return err; +} + +/** + * iser_send_data_out - send data out PDU + */ +int iser_send_data_out(struct iscsi_conn *conn, + struct iscsi_cmd_task *ctask, + struct iscsi_data *hdr) +{ + struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + struct iser_desc *tx_desc = NULL; + struct iser_dto *send_dto = NULL; + unsigned long buf_offset; + unsigned long data_seg_len; + unsigned int itt; + int err = 0; + + if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { + iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); + return -EPERM; + } + + if (iser_check_xmit(conn, ctask)) + return -EAGAIN; + + itt = ntohl(hdr->itt); + data_seg_len = ntoh24(hdr->dlength); + buf_offset = ntohl(hdr->offset); + + iser_dbg("%s itt %d dseg_len %d offset %d\n", + __func__,(int)itt,(int)data_seg_len,(int)buf_offset); + + tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); + if (tx_desc == NULL) { + iser_err("Failed to alloc desc for post dataout\n"); + return -ENOMEM; + } + + tx_desc->type = ISCSI_TX_DATAOUT; + memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); + + /* build the tx desc regd header and add it to the tx desc dto */ + send_dto = &tx_desc->dto; + send_dto->ctask = iser_ctask; + iser_create_send_desc(iser_conn, tx_desc); + + iser_reg_single(iser_conn->ib_conn->device, + send_dto->regd[0], DMA_TO_DEVICE); + + /* all data was registered for RDMA, we can use the lkey */ + iser_dto_add_regd_buff(send_dto, + &iser_ctask->rdma_regd[ISER_DIR_OUT], + buf_offset, + data_seg_len); + + if (buf_offset + data_seg_len > iser_ctask->data[ISER_DIR_OUT].data_len) { + iser_err("Offset:%ld & DSL:%ld in Data-Out " + "inconsistent with total len:%ld, itt:%d\n", + buf_offset, data_seg_len, + iser_ctask->data[ISER_DIR_OUT].data_len, itt); + err = -EINVAL; + goto send_data_out_error; + } + iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n", + itt, buf_offset, data_seg_len); + + + err = iser_post_send(tx_desc); + if (!err) + return 0; + +send_data_out_error: + iser_dto_buffs_release(send_dto); + kmem_cache_free(ig.desc_cache, tx_desc); + iser_err("conn %p failed err %d\n",conn, err); + return err; +} + +int iser_send_control(struct iscsi_conn *conn, + struct iscsi_mgmt_task *mtask) +{ + struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_desc *mdesc = mtask->dd_data; + struct iser_dto *send_dto = NULL; + unsigned int itt; + unsigned long data_seg_len; + int err = 0; + unsigned char opcode; + struct iser_regd_buf *regd_buf; + struct iser_device *device; + + if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { + iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); + return -EPERM; + } + + if (iser_check_xmit(conn,mtask)) + return -EAGAIN; + + /* build the tx desc regd header and add it to the tx desc dto */ + mdesc->type = ISCSI_TX_CONTROL; + send_dto = &mdesc->dto; + send_dto->ctask = NULL; + iser_create_send_desc(iser_conn, mdesc); + + device = iser_conn->ib_conn->device; + + iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE); + + itt = ntohl(mtask->hdr->itt); + opcode = mtask->hdr->opcode & ISCSI_OPCODE_MASK; + data_seg_len = ntoh24(mtask->hdr->dlength); + + if (data_seg_len > 0) { + regd_buf = &mdesc->data_regd_buf; + memset(regd_buf, 0, sizeof(struct iser_regd_buf)); + regd_buf->device = device; + regd_buf->virt_addr = mtask->data; + regd_buf->data_size = mtask->data_count; + iser_reg_single(device, regd_buf, + DMA_TO_DEVICE); + iser_dto_add_regd_buff(send_dto, regd_buf, + 0, + data_seg_len); + } + + if (iser_post_receive_control(conn) != 0) { + iser_err("post_rcv_buff failed!\n"); + err = -ENOMEM; + goto send_control_error; + } + + err = iser_post_send(mdesc); + if (!err) + return 0; + +send_control_error: + iser_dto_buffs_release(send_dto); + iser_err("conn %p failed err %d\n",conn, err); + return err; +} + +/** + * iser_rcv_dto_completion - recv DTO completion + */ +void iser_rcv_completion(struct iser_desc *rx_desc, + unsigned long dto_xfer_len) +{ + struct iser_dto *dto = &rx_desc->dto; + struct iscsi_iser_conn *conn = dto->conn; + struct iscsi_session *session = conn->iscsi_conn->session; + struct iscsi_cmd_task *ctask; + struct iscsi_iser_cmd_task *iser_ctask; + struct iscsi_hdr *hdr; + char *rx_data = NULL; + int rx_data_len = 0; + unsigned int itt; + unsigned char opcode; + + hdr = &rx_desc->iscsi_header; + + iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt); + + if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */ + rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN; + rx_data = dto->regd[1]->virt_addr; + rx_data += dto->offset[1]; + } + + opcode = hdr->opcode & ISCSI_OPCODE_MASK; + + if (opcode == ISCSI_OP_SCSI_CMD_RSP) { + itt = hdr->itt & ISCSI_ITT_MASK; /* mask out cid and age bits */ + if (!(itt < session->cmds_max)) + iser_err("itt can't be matched to task!!!" + "conn %p opcode %d cmds_max %d itt %d\n", + conn->iscsi_conn,opcode,session->cmds_max,itt); + /* use the mapping given with the cmds array indexed by itt */ + ctask = (struct iscsi_cmd_task *)session->cmds[itt]; + iser_ctask = ctask->dd_data; + iser_dbg("itt %d ctask %p\n",itt,ctask); + iser_ctask->status = ISER_TASK_STATUS_COMPLETED; + iser_ctask_rdma_finalize(iser_ctask); + } + + iser_dto_buffs_release(dto); + + iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); + + kfree(rx_desc->data); + kmem_cache_free(ig.desc_cache, rx_desc); + + /* decrementing conn->post_recv_buf_count only --after-- freeing the * + * task eliminates the need to worry on tasks which are completed in * + * parallel to the execution of iser_conn_term. So the code that waits * + * for the posted rx bufs refcount to become zero handles everything */ + atomic_dec(&conn->ib_conn->post_recv_buf_count); +} + +void iser_snd_completion(struct iser_desc *tx_desc) +{ + struct iser_dto *dto = &tx_desc->dto; + struct iscsi_iser_conn *iser_conn = dto->conn; + struct iscsi_conn *conn = iser_conn->iscsi_conn; + struct iscsi_mgmt_task *mtask; + + iser_dbg("Initiator, Data sent dto=0x%p\n", dto); + + iser_dto_buffs_release(dto); + + if (tx_desc->type == ISCSI_TX_DATAOUT) + kmem_cache_free(ig.desc_cache, tx_desc); + + atomic_dec(&iser_conn->ib_conn->post_send_buf_count); + + write_lock(conn->recv_lock); + if (conn->suspend_tx) { + iser_dbg("%ld resuming tx\n",jiffies); + clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + scsi_queue_work(conn->session->host, &conn->xmitwork); + } + write_unlock(conn->recv_lock); + + if (tx_desc->type == ISCSI_TX_CONTROL) { + /* this arithmetic is legal by libiscsi dd_data allocation */ + mtask = (void *) ((long)(void *)tx_desc - + sizeof(struct iscsi_mgmt_task)); + if (mtask->hdr->itt == cpu_to_be32(ISCSI_RESERVED_TAG)) { + struct iscsi_session *session = conn->session; + + spin_lock(&conn->session->lock); + list_del(&mtask->running); + __kfifo_put(session->mgmtpool.queue, (void*)&mtask, + sizeof(void*)); + spin_unlock(&session->lock); + } + } +} + +void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask) + +{ + iser_ctask->status = ISER_TASK_STATUS_INIT; + + iser_ctask->dir[ISER_DIR_IN] = 0; + iser_ctask->dir[ISER_DIR_OUT] = 0; + + iser_ctask->data[ISER_DIR_IN].data_len = 0; + iser_ctask->data[ISER_DIR_OUT].data_len = 0; + + memset(&iser_ctask->rdma_regd[ISER_DIR_IN], 0, + sizeof(struct iser_regd_buf)); + memset(&iser_ctask->rdma_regd[ISER_DIR_OUT], 0, + sizeof(struct iser_regd_buf)); +} + +void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) +{ + int deferred; + + /* if we were reading, copy back to unaligned sglist, + * anyway dma_unmap and free the copy + */ + if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) + iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN); + if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) + iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT); + + if (iser_ctask->dir[ISER_DIR_IN]) { + deferred = iser_regd_buff_release + (&iser_ctask->rdma_regd[ISER_DIR_IN]); + if (deferred) { + iser_err("References remain for BUF-IN rdma reg\n"); + BUG(); + } + } + + if (iser_ctask->dir[ISER_DIR_OUT]) { + deferred = iser_regd_buff_release + (&iser_ctask->rdma_regd[ISER_DIR_OUT]); + if (deferred) { + iser_err("References remain for BUF-OUT rdma reg\n"); + BUG(); + } + } + + iser_dma_unmap_task_data(iser_ctask); +} + +void iser_dto_buffs_release(struct iser_dto *dto) +{ + int i; + + for (i = 0; i < dto->regd_vector_len; i++) + iser_regd_buff_release(dto->regd[i]); +} + diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c new file mode 100644 index 0000000..31950a5 --- /dev/null +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: iser_memory.c 6964 2006-05-07 11:11:43Z ogerlitz $ + */ +#include +#include +#include +#include +#include +#include +#include + +#include "iscsi_iser.h" + +#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ +/** + * Decrements the reference count for the + * registered buffer & releases it + * + * returns 0 if released, 1 if deferred + */ +int iser_regd_buff_release(struct iser_regd_buf *regd_buf) +{ + struct device *dma_device; + + if ((atomic_read(®d_buf->ref_count) == 0) || + atomic_dec_and_test(®d_buf->ref_count)) { + /* if we used the dma mr, unreg is just NOP */ + if (regd_buf->reg.rkey != 0) + iser_unreg_mem(®d_buf->reg); + + if (regd_buf->dma_addr) { + dma_device = regd_buf->device->ib_device->dma_device; + dma_unmap_single(dma_device, + regd_buf->dma_addr, + regd_buf->data_size, + regd_buf->direction); + } + /* else this regd buf is associated with task which we */ + /* dma_unmap_single/sg later */ + return 0; + } else { + iser_dbg("Release deferred, regd.buff: 0x%p\n", regd_buf); + return 1; + } +} + +/** + * iser_reg_single - fills registered buffer descriptor with + * registration information + */ +void iser_reg_single(struct iser_device *device, + struct iser_regd_buf *regd_buf, + enum dma_data_direction direction) +{ + dma_addr_t dma_addr; + + dma_addr = dma_map_single(device->ib_device->dma_device, + regd_buf->virt_addr, + regd_buf->data_size, direction); + BUG_ON(dma_mapping_error(dma_addr)); + + regd_buf->reg.lkey = device->mr->lkey; + regd_buf->reg.rkey = 0; /* indicate there's no need to unreg */ + regd_buf->reg.len = regd_buf->data_size; + regd_buf->reg.va = dma_addr; + + regd_buf->dma_addr = dma_addr; + regd_buf->direction = direction; +} + +/** + * iser_start_rdma_unaligned_sg + */ +int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, + enum iser_data_dir cmd_dir) +{ + int dma_nents; + struct device *dma_device; + char *mem = NULL; + struct iser_data_buf *data = &iser_ctask->data[cmd_dir]; + unsigned long cmd_data_len = data->data_len; + + if (cmd_data_len > ISER_KMALLOC_THRESHOLD) + mem = (void *)__get_free_pages(GFP_NOIO, + long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); + else + mem = kmalloc(cmd_data_len, GFP_NOIO); + + if (mem == NULL) { + iser_err("Failed to allocate mem size %d %d for copying sglist\n", + data->size,(int)cmd_data_len); + return -ENOMEM; + } + + if (cmd_dir == ISER_DIR_OUT) { + /* copy the unaligned sg the buffer which is used for RDMA */ + struct scatterlist *sg = (struct scatterlist *)data->buf; + int i; + char *p, *from; + + for (p = mem, i = 0; i < data->size; i++) { + from = kmap_atomic(sg[i].page, KM_USER0); + memcpy(p, + from + sg[i].offset, + sg[i].length); + kunmap_atomic(from, KM_USER0); + p += sg[i].length; + } + } + + sg_init_one(&iser_ctask->data_copy[cmd_dir].sg_single, mem, cmd_data_len); + iser_ctask->data_copy[cmd_dir].buf = + &iser_ctask->data_copy[cmd_dir].sg_single; + iser_ctask->data_copy[cmd_dir].size = 1; + + iser_ctask->data_copy[cmd_dir].copy_buf = mem; + + dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + + if (cmd_dir == ISER_DIR_OUT) + dma_nents = dma_map_sg(dma_device, + &iser_ctask->data_copy[cmd_dir].sg_single, + 1, DMA_TO_DEVICE); + else + dma_nents = dma_map_sg(dma_device, + &iser_ctask->data_copy[cmd_dir].sg_single, + 1, DMA_FROM_DEVICE); + + BUG_ON(dma_nents == 0); + + iser_ctask->data_copy[cmd_dir].dma_nents = dma_nents; + return 0; +} + +/** + * iser_finalize_rdma_unaligned_sg + */ +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, + enum iser_data_dir cmd_dir) +{ + struct device *dma_device; + struct iser_data_buf *mem_copy; + unsigned long cmd_data_len; + + dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + mem_copy = &iser_ctask->data_copy[cmd_dir]; + + if (cmd_dir == ISER_DIR_OUT) + dma_unmap_sg(dma_device, &mem_copy->sg_single, 1, + DMA_TO_DEVICE); + else + dma_unmap_sg(dma_device, &mem_copy->sg_single, 1, + DMA_FROM_DEVICE); + + if (cmd_dir == ISER_DIR_IN) { + char *mem; + struct scatterlist *sg; + unsigned char *p, *to; + unsigned int sg_size; + int i; + + /* copy back read RDMA to unaligned sg */ + mem = mem_copy->copy_buf; + + sg = (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf; + sg_size = iser_ctask->data[ISER_DIR_IN].size; + + for (p = mem, i = 0; i < sg_size; i++){ + to = kmap_atomic(sg[i].page, KM_SOFTIRQ0); + memcpy(to + sg[i].offset, + p, + sg[i].length); + kunmap_atomic(to, KM_SOFTIRQ0); + p += sg[i].length; + } + } + + cmd_data_len = iser_ctask->data[cmd_dir].data_len; + + if (cmd_data_len > ISER_KMALLOC_THRESHOLD) + free_pages((unsigned long)mem_copy->copy_buf, + long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); + else + kfree(mem_copy->copy_buf); + + mem_copy->copy_buf = NULL; +} + +/** + * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses + * and returns the length of resulting physical address array (may be less than + * the original due to possible compaction). + * + * we build a "page vec" under the assumption that the SG meets the RDMA + * alignment requirements. Other then the first and last SG elements, all + * the "internal" elements can be compacted into a list whose elements are + * dma addresses of physical pages. The code supports also the weird case + * where --few fragments of the same page-- are present in the SG as + * consecutive elements. Also, it handles one entry SG. + */ +static int iser_sg_to_page_vec(struct iser_data_buf *data, + struct iser_page_vec *page_vec) +{ + struct scatterlist *sg = (struct scatterlist *)data->buf; + dma_addr_t first_addr, last_addr, page; + int start_aligned, end_aligned; + unsigned int cur_page = 0; + unsigned long total_sz = 0; + int i; + + /* compute the offset of first element */ + page_vec->offset = (u64) sg[0].offset; + + for (i = 0; i < data->dma_nents; i++) { + total_sz += sg_dma_len(&sg[i]); + + first_addr = sg_dma_address(&sg[i]); + last_addr = first_addr + sg_dma_len(&sg[i]); + + start_aligned = !(first_addr & ~PAGE_MASK); + end_aligned = !(last_addr & ~PAGE_MASK); + + /* continue to collect page fragments till aligned or SG ends */ + while (!end_aligned && (i + 1 < data->dma_nents)) { + i++; + total_sz += sg_dma_len(&sg[i]); + last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]); + end_aligned = !(last_addr & ~PAGE_MASK); + } + + first_addr = first_addr & PAGE_MASK; + + for (page = first_addr; page < last_addr; page += PAGE_SIZE) + page_vec->pages[cur_page++] = page; + + } + page_vec->data_size = total_sz; + iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page); + return cur_page; +} + +#define MASK_4K ((1UL << 12) - 1) /* 0xFFF */ +#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & MASK_4K) == 0) + +/** + * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned + * for RDMA sub-list of a scatter-gather list of memory buffers, and returns + * the number of entries which are aligned correctly. Supports the case where + * consecutive SG elements are actually fragments of the same physcial page. + */ +static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data) +{ + struct scatterlist *sg; + dma_addr_t end_addr, next_addr; + int i, cnt; + unsigned int ret_len = 0; + + sg = (struct scatterlist *)data->buf; + + for (cnt = 0, i = 0; i < data->dma_nents; i++, cnt++) { + /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX " + "offset: %ld sz: %ld\n", i, + (unsigned long)page_to_phys(sg[i].page), + (unsigned long)sg[i].offset, + (unsigned long)sg[i].length); */ + end_addr = sg_dma_address(&sg[i]) + + sg_dma_len(&sg[i]); + /* iser_dbg("Checking sg iobuf end address " + "0x%08lX\n", end_addr); */ + if (i + 1 < data->dma_nents) { + next_addr = sg_dma_address(&sg[i+1]); + /* are i, i+1 fragments of the same page? */ + if (end_addr == next_addr) + continue; + else if (!IS_4K_ALIGNED(end_addr)) { + ret_len = cnt + 1; + break; + } + } + } + if (i == data->dma_nents) + ret_len = cnt; /* loop ended */ + iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n", + ret_len, data->dma_nents, data); + return ret_len; +} + +static void iser_data_buf_dump(struct iser_data_buf *data) +{ + struct scatterlist *sg = (struct scatterlist *)data->buf; + int i; + + for (i = 0; i < data->size; i++) + iser_err("sg[%d] dma_addr:0x%lX page:0x%p " + "off:%d sz:%d dma_len:%d\n", + i, (unsigned long)sg_dma_address(&sg[i]), + sg[i].page, sg[i].offset, + sg[i].length,sg_dma_len(&sg[i])); +} + +static void iser_dump_page_vec(struct iser_page_vec *page_vec) +{ + int i; + + iser_err("page vec length %d data size %d\n", + page_vec->length, page_vec->data_size); + for (i = 0; i < page_vec->length; i++) + iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]); +} + +static void iser_page_vec_build(struct iser_data_buf *data, + struct iser_page_vec *page_vec) +{ + int page_vec_len = 0; + + page_vec->length = 0; + page_vec->offset = 0; + + iser_dbg("Translating sg sz: %d\n", data->dma_nents); + page_vec_len = iser_sg_to_page_vec(data,page_vec); + iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len); + + page_vec->length = page_vec_len; + + if (page_vec_len * PAGE_SIZE < page_vec->data_size) { + iser_err("page_vec too short to hold this SG\n"); + iser_data_buf_dump(data); + iser_dump_page_vec(page_vec); + BUG(); + } +} + +/** + * iser_reg_rdma_mem - Registers memory intended for RDMA, + * obtaining rkey and va + * + * returns 0 on success, errno code on failure + */ +int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, + enum iser_data_dir cmd_dir) +{ + struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; + struct iser_data_buf *mem = &iser_ctask->data[cmd_dir]; + struct iser_regd_buf *regd_buf; + int aligned_len; + int err; + + regd_buf = &iser_ctask->rdma_regd[cmd_dir]; + + aligned_len = iser_data_buf_aligned_len(mem); + if (aligned_len != mem->size) { + iser_err("rdma alignment violation %d/%d aligned\n", + aligned_len, mem->size); + iser_data_buf_dump(mem); + /* allocate copy buf, if we are writing, copy the */ + /* unaligned scatterlist, dma map the copy */ + if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0) + return -ENOMEM; + mem = &iser_ctask->data_copy[cmd_dir]; + } + + iser_page_vec_build(mem, ib_conn->page_vec); + err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); + if (err) + return err; + + /* take a reference on this regd buf such that it will not be released * + * (eg in send dto completion) before we get the scsi response */ + atomic_inc(®d_buf->ref_count); + return 0; +} diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c new file mode 100644 index 0000000..ff117bb --- /dev/null +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -0,0 +1,827 @@ +/* + * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $ + */ +#include +#include +#include +#include +#include +#include + +#include "iscsi_iser.h" + +#define ISCSI_ISER_MAX_CONN 8 +#define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ + ISER_QP_MAX_REQ_DTOS) * \ + ISCSI_ISER_MAX_CONN) + +static void iser_cq_tasklet_fn(unsigned long data); +static void iser_cq_callback(struct ib_cq *cq, void *cq_context); +static void iser_comp_error_worker(void *data); + +static void iser_cq_event_callback(struct ib_event *cause, void *context) +{ + iser_err("got cq event %d \n", cause->event); +} + +static void iser_qp_event_callback(struct ib_event *cause, void *context) +{ + iser_err("got qp event %d\n",cause->event); +} + +/** + * iser_create_device_ib_res - creates Protection Domain (PD), Completion + * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with + * the adapator. + * + * returns 0 on success, -1 on failure + */ +static int iser_create_device_ib_res(struct iser_device *device) +{ + device->pd = ib_alloc_pd(device->ib_device); + if (IS_ERR(device->pd)) + goto pd_err; + + device->cq = ib_create_cq(device->ib_device, + iser_cq_callback, + iser_cq_event_callback, + (void *)device, + ISER_MAX_CQ_LEN); + if (IS_ERR(device->cq)) + goto cq_err; + + if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP)) + goto cq_arm_err; + + tasklet_init(&device->cq_tasklet, + iser_cq_tasklet_fn, + (unsigned long)device); + + device->mr = ib_get_dma_mr(device->pd, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(device->mr)) + goto dma_mr_err; + + return 0; + +dma_mr_err: + tasklet_kill(&device->cq_tasklet); +cq_arm_err: + ib_destroy_cq(device->cq); +cq_err: + ib_dealloc_pd(device->pd); +pd_err: + iser_err("failed to allocate an IB resource\n"); + return -1; +} + +/** + * iser_free_device_ib_res - destory/dealloc/dereg the DMA MR, + * CQ and PD created with the device associated with the adapator. + */ +static void iser_free_device_ib_res(struct iser_device *device) +{ + BUG_ON(device->mr == NULL); + + tasklet_kill(&device->cq_tasklet); + + (void)ib_dereg_mr(device->mr); + (void)ib_destroy_cq(device->cq); + (void)ib_dealloc_pd(device->pd); + + device->mr = NULL; + device->cq = NULL; + device->pd = NULL; +} + +/** + * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP) + * + * returns 0 on success, -1 on failure + */ +static int iser_create_ib_conn_res(struct iser_conn *ib_conn) +{ + struct iser_device *device; + struct ib_qp_init_attr init_attr; + int ret; + struct ib_fmr_pool_param params; + + BUG_ON(ib_conn->device == NULL); + + device = ib_conn->device; + + ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + + (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), + GFP_KERNEL); + if (!ib_conn->page_vec) { + ret = -ENOMEM; + goto alloc_err; + } + ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); + + params.page_shift = PAGE_SHIFT; + /* when the first/last SG element are not start/end * + * page aligned, the map whould be of N+1 pages */ + params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; + /* make the pool size twice the max number of SCSI commands * + * the ML is expected to queue, watermark for unmap at 50% */ + params.pool_size = ISCSI_XMIT_CMDS_MAX * 2; + params.dirty_watermark = ISCSI_XMIT_CMDS_MAX; + params.cache = 0; + params.flush_function = NULL; + params.access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); + + ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); + if (IS_ERR(ib_conn->fmr_pool)) { + ret = PTR_ERR(ib_conn->fmr_pool); + goto fmr_pool_err; + } + + memset(&init_attr, 0, sizeof init_attr); + + init_attr.event_handler = iser_qp_event_callback; + init_attr.qp_context = (void *)ib_conn; + init_attr.send_cq = device->cq; + init_attr.recv_cq = device->cq; + init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; + init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; + init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; + init_attr.cap.max_recv_sge = 2; + init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + init_attr.qp_type = IB_QPT_RC; + + ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); + if (ret) + goto qp_err; + + ib_conn->qp = ib_conn->cma_id->qp; + iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n", + ib_conn, ib_conn->cma_id, + ib_conn->fmr_pool, ib_conn->cma_id->qp); + return ret; + +qp_err: + (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); +fmr_pool_err: + kfree(ib_conn->page_vec); +alloc_err: + iser_err("unable to alloc mem or create resource, err %d\n", ret); + return ret; +} + +/** + * releases the FMR pool, QP and CMA ID objects, returns 0 on success, + * -1 on failure + */ +static int iser_free_ib_conn_res(struct iser_conn *ib_conn) +{ + BUG_ON(ib_conn == NULL); + + iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n", + ib_conn, ib_conn->cma_id, + ib_conn->fmr_pool, ib_conn->qp); + + /* qp is created only once both addr & route are resolved */ + if (ib_conn->fmr_pool != NULL) + ib_destroy_fmr_pool(ib_conn->fmr_pool); + + if (ib_conn->qp != NULL) + rdma_destroy_qp(ib_conn->cma_id); + + if (ib_conn->cma_id != NULL) + rdma_destroy_id(ib_conn->cma_id); + + ib_conn->fmr_pool = NULL; + ib_conn->qp = NULL; + ib_conn->cma_id = NULL; + kfree(ib_conn->page_vec); + + return 0; +} + +/** + * based on the resolved device node GUID see if there already allocated + * device for this device. If there's no such, create one. + */ +static +struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) +{ + struct list_head *p_list; + struct iser_device *device = NULL; + + mutex_lock(&ig.device_list_mutex); + + p_list = ig.device_list.next; + while (p_list != &ig.device_list) { + device = list_entry(p_list, struct iser_device, ig_list); + /* find if there's a match using the node GUID */ + if (device->ib_device->node_guid == cma_id->device->node_guid) + break; + } + + if (device == NULL) { + device = kzalloc(sizeof *device, GFP_KERNEL); + if (device == NULL) + goto out; + /* assign this device to the device */ + device->ib_device = cma_id->device; + /* init the device and link it into ig device list */ + if (iser_create_device_ib_res(device)) { + kfree(device); + device = NULL; + goto out; + } + list_add(&device->ig_list, &ig.device_list); + } +out: + BUG_ON(device == NULL); + device->refcount++; + mutex_unlock(&ig.device_list_mutex); + return device; +} + +/* if there's no demand for this device, release it */ +static void iser_device_try_release(struct iser_device *device) +{ + mutex_lock(&ig.device_list_mutex); + device->refcount--; + iser_err("device %p refcount %d\n",device,device->refcount); + if (!device->refcount) { + iser_free_device_ib_res(device); + list_del(&device->ig_list); + kfree(device); + } + mutex_unlock(&ig.device_list_mutex); +} + +int iser_conn_state_comp(struct iser_conn *ib_conn, + enum iser_ib_conn_state comp) +{ + int ret; + + spin_lock_bh(&ib_conn->lock); + ret = (ib_conn->state == comp); + spin_unlock_bh(&ib_conn->lock); + return ret; +} + +static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, + enum iser_ib_conn_state comp, + enum iser_ib_conn_state exch) +{ + int ret; + + spin_lock_bh(&ib_conn->lock); + if ((ret = (ib_conn->state == comp))) + ib_conn->state = exch; + spin_unlock_bh(&ib_conn->lock); + return ret; +} + +/** + * triggers start of the disconnect procedures and wait for them to be done + */ +void iser_conn_terminate(struct iser_conn *ib_conn) +{ + int err = 0; + + /* change the ib conn state only if the conn is UP, however always call + * rdma_disconnect since this is the only way to cause the CMA to change + * the QP state to ERROR + */ + + iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); + err = rdma_disconnect(ib_conn->cma_id); + if (err) + iser_err("Failed to disconnect, conn: 0x%p err %d\n", + ib_conn,err); + + wait_event_interruptible(ib_conn->wait, + ib_conn->state == ISER_CONN_DOWN); + + iser_conn_release(ib_conn); +} + +static void iser_connect_error(struct rdma_cm_id *cma_id) +{ + struct iser_conn *ib_conn; + ib_conn = (struct iser_conn *)cma_id->context; + + ib_conn->state = ISER_CONN_DOWN; + wake_up_interruptible(&ib_conn->wait); +} + +static void iser_addr_handler(struct rdma_cm_id *cma_id) +{ + struct iser_device *device; + struct iser_conn *ib_conn; + int ret; + + device = iser_device_find_by_ib_device(cma_id); + ib_conn = (struct iser_conn *)cma_id->context; + ib_conn->device = device; + + ret = rdma_resolve_route(cma_id, 1000); + if (ret) { + iser_err("resolve route failed: %d\n", ret); + iser_connect_error(cma_id); + } + return; +} + +static void iser_route_handler(struct rdma_cm_id *cma_id) +{ + struct rdma_conn_param conn_param; + int ret; + + ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); + if (ret) + goto failure; + + iser_dbg("path.mtu is %d setting it to %d\n", + cma_id->route.path_rec->mtu, IB_MTU_1024); + + /* we must set the MTU to 1024 as this is what the target is assuming */ + if (cma_id->route.path_rec->mtu > IB_MTU_1024) + cma_id->route.path_rec->mtu = IB_MTU_1024; + + memset(&conn_param, 0, sizeof conn_param); + conn_param.responder_resources = 4; + conn_param.initiator_depth = 1; + conn_param.retry_count = 7; + conn_param.rnr_retry_count = 6; + + ret = rdma_connect(cma_id, &conn_param); + if (ret) { + iser_err("failure connecting: %d\n", ret); + goto failure; + } + + return; +failure: + iser_connect_error(cma_id); +} + +static void iser_connected_handler(struct rdma_cm_id *cma_id) +{ + struct iser_conn *ib_conn; + + ib_conn = (struct iser_conn *)cma_id->context; + ib_conn->state = ISER_CONN_UP; + wake_up_interruptible(&ib_conn->wait); +} + +static void iser_disconnected_handler(struct rdma_cm_id *cma_id) +{ + struct iser_conn *ib_conn; + + ib_conn = (struct iser_conn *)cma_id->context; + ib_conn->disc_evt_flag = 1; + + /* getting here when the state is UP means that the conn is being * + * terminated asynchronously from the iSCSI layer's perspective. */ + if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, + ISER_CONN_TERMINATING)) + iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); + + /* Complete the termination process if no posts are pending */ + if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) && + (atomic_read(&ib_conn->post_send_buf_count) == 0)) { + ib_conn->state = ISER_CONN_DOWN; + wake_up_interruptible(&ib_conn->wait); + } +} + +static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) +{ + int ret = 0; + + iser_err("event %d conn %p id %p\n",event->event,cma_id->context,cma_id); + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + iser_addr_handler(cma_id); + break; + case RDMA_CM_EVENT_ROUTE_RESOLVED: + iser_route_handler(cma_id); + break; + case RDMA_CM_EVENT_ESTABLISHED: + iser_connected_handler(cma_id); + break; + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_REJECTED: + iser_err("event: %d, error: %d\n", event->event, event->status); + iser_connect_error(cma_id); + break; + case RDMA_CM_EVENT_DISCONNECTED: + iser_disconnected_handler(cma_id); + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + BUG(); + break; + case RDMA_CM_EVENT_CONNECT_RESPONSE: + BUG(); + break; + case RDMA_CM_EVENT_CONNECT_REQUEST: + default: + break; + } + return ret; +} + +int iser_conn_init(struct iser_conn **ibconn) +{ + struct iser_conn *ib_conn; + + ib_conn = kzalloc(sizeof *ib_conn, GFP_KERNEL); + if (!ib_conn) { + iser_err("can't alloc memory for struct iser_conn\n"); + return -ENOMEM; + } + ib_conn->state = ISER_CONN_INIT; + init_waitqueue_head(&ib_conn->wait); + atomic_set(&ib_conn->post_recv_buf_count, 0); + atomic_set(&ib_conn->post_send_buf_count, 0); + INIT_WORK(&ib_conn->comperror_work, iser_comp_error_worker, + ib_conn); + INIT_LIST_HEAD(&ib_conn->conn_list); + spin_lock_init(&ib_conn->lock); + + *ibconn = ib_conn; + return 0; +} + + /** + * starts the process of connecting to the target + * sleeps untill the connection is established or rejected + */ +int iser_connect(struct iser_conn *ib_conn, + struct sockaddr_in *src_addr, + struct sockaddr_in *dst_addr, + int non_blocking) +{ + struct sockaddr *src, *dst; + int err = 0; + + sprintf(ib_conn->name,"%d.%d.%d.%d:%d", + NIPQUAD(dst_addr->sin_addr.s_addr), dst_addr->sin_port); + + /* the device is known only --after-- address resolution */ + ib_conn->device = NULL; + + iser_err("connecting to: %d.%d.%d.%d, port 0x%x\n", + NIPQUAD(dst_addr->sin_addr), dst_addr->sin_port); + + ib_conn->state = ISER_CONN_PENDING; + + ib_conn->cma_id = rdma_create_id(iser_cma_handler, + (void *)ib_conn, + RDMA_PS_TCP); + if (IS_ERR(ib_conn->cma_id)) { + err = PTR_ERR(ib_conn->cma_id); + iser_err("rdma_create_id failed: %d\n", err); + goto id_failure; + } + + src = (struct sockaddr *)src_addr; + dst = (struct sockaddr *)dst_addr; + err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); + if (err) { + iser_err("rdma_resolve_addr failed: %d\n", err); + goto addr_failure; + } + + if (!non_blocking) { + wait_event_interruptible(ib_conn->wait, + (ib_conn->state != ISER_CONN_PENDING)); + + if (ib_conn->state != ISER_CONN_UP) { + err = -EIO; + goto connect_failure; + } + } + + mutex_lock(&ig.connlist_mutex); + list_add(&ib_conn->conn_list, &ig.connlist); + mutex_unlock(&ig.connlist_mutex); + return 0; + +id_failure: + ib_conn->cma_id = NULL; +addr_failure: + ib_conn->state = ISER_CONN_DOWN; +connect_failure: + iser_conn_release(ib_conn); + return err; +} + +/** + * Frees all conn objects and deallocs conn descriptor + */ +void iser_conn_release(struct iser_conn *ib_conn) +{ + struct iser_device *device = ib_conn->device; + + BUG_ON(ib_conn->state != ISER_CONN_DOWN); + + mutex_lock(&ig.connlist_mutex); + list_del(&ib_conn->conn_list); + mutex_unlock(&ig.connlist_mutex); + + iser_free_ib_conn_res(ib_conn); + ib_conn->device = NULL; + /* on EVENT_ADDR_ERROR there's no device yet for this conn */ + if (device != NULL) + iser_device_try_release(device); + kfree(ib_conn); +} + + +/** + * iser_reg_page_vec - Register physical memory + * + * returns: 0 on success, errno code on failure + */ +int iser_reg_page_vec(struct iser_conn *ib_conn, + struct iser_page_vec *page_vec, + struct iser_mem_reg *mem_reg) +{ + struct ib_pool_fmr *mem; + u64 io_addr; + u64 *page_list; + int status; + + page_list = page_vec->pages; + io_addr = page_list[0]; + + mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool, + page_list, + page_vec->length, + &io_addr); + + if (IS_ERR(mem)) { + status = (int)PTR_ERR(mem); + iser_err("ib_fmr_pool_map_phys failed: %d\n", status); + return status; + } + + mem_reg->lkey = mem->fmr->lkey; + mem_reg->rkey = mem->fmr->rkey; + mem_reg->len = page_vec->length * PAGE_SIZE; + mem_reg->va = io_addr; + mem_reg->mem_h = (void *)mem; + + mem_reg->va += page_vec->offset; + mem_reg->len = page_vec->data_size; + + iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, " + "entry[0]: (0x%08lx,%ld)] -> " + "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n", + page_vec, page_vec->length, + (unsigned long)page_vec->pages[0], + (unsigned long)page_vec->data_size, + (unsigned int)mem_reg->lkey, mem_reg->mem_h, + (unsigned long)mem_reg->va, (unsigned long)mem_reg->len); + return 0; +} + +/** + * Unregister (previosuly registered) memory. + */ +void iser_unreg_mem(struct iser_mem_reg *reg) +{ + int ret; + + iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); + + ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); + if (ret) + iser_err("ib_fmr_pool_unmap failed %d\n", ret); + + reg->mem_h = NULL; +} + +/** + * iser_dto_to_iov - builds IOV from a dto descriptor + */ +static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len) +{ + int i; + struct ib_sge *sge; + struct iser_regd_buf *regd_buf; + + if (dto->regd_vector_len > iov_len) { + iser_err("iov size %d too small for posting dto of len %d\n", + iov_len, dto->regd_vector_len); + BUG(); + } + + for (i = 0; i < dto->regd_vector_len; i++) { + sge = &iov[i]; + regd_buf = dto->regd[i]; + + sge->addr = regd_buf->reg.va; + sge->length = regd_buf->reg.len; + sge->lkey = regd_buf->reg.lkey; + + if (dto->used_sz[i] > 0) /* Adjust size */ + sge->length = dto->used_sz[i]; + + /* offset and length should not exceed the regd buf length */ + if (sge->length + dto->offset[i] > regd_buf->reg.len) { + iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:" + "%ld in dto:0x%p [%d], va:0x%08lX\n", + (unsigned long)sge->length, dto->offset[i], + (unsigned long)regd_buf->reg.len, dto, i, + (unsigned long)sge->addr); + BUG(); + } + + sge->addr += dto->offset[i]; /* Adjust offset */ + } +} + +/** + * iser_post_recv - Posts a receive buffer. + * + * returns 0 on success, -1 on failure + */ +int iser_post_recv(struct iser_desc *rx_desc) +{ + int ib_ret, ret_val = 0; + struct ib_recv_wr recv_wr, *recv_wr_failed; + struct ib_sge iov[2]; + struct iser_conn *ib_conn; + struct iser_dto *recv_dto = &rx_desc->dto; + + /* Retrieve conn */ + ib_conn = recv_dto->conn->ib_conn; + + iser_dto_to_iov(recv_dto, iov, 2); + + recv_wr.next = NULL; + recv_wr.sg_list = iov; + recv_wr.num_sge = recv_dto->regd_vector_len; + recv_wr.wr_id = (unsigned long)rx_desc; + + atomic_inc(&ib_conn->post_recv_buf_count); + ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed); + if (ib_ret) { + iser_err("ib_post_recv failed ret=%d\n", ib_ret); + atomic_dec(&ib_conn->post_recv_buf_count); + ret_val = -1; + } + + return ret_val; +} + +/** + * iser_start_send - Initiate a Send DTO operation + * + * returns 0 on success, -1 on failure + */ +int iser_post_send(struct iser_desc *tx_desc) +{ + int ib_ret, ret_val = 0; + struct ib_send_wr send_wr, *send_wr_failed; + struct ib_sge iov[MAX_REGD_BUF_VECTOR_LEN]; + struct iser_conn *ib_conn; + struct iser_dto *dto = &tx_desc->dto; + + ib_conn = dto->conn->ib_conn; + + iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN); + + send_wr.next = NULL; + send_wr.wr_id = (unsigned long)tx_desc; + send_wr.sg_list = iov; + send_wr.num_sge = dto->regd_vector_len; + send_wr.opcode = IB_WR_SEND; + send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0; + + atomic_inc(&ib_conn->post_send_buf_count); + + ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); + if (ib_ret) { + iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n", + dto, dto->regd_vector_len); + iser_err("ib_post_send failed, ret:%d\n", ib_ret); + atomic_dec(&ib_conn->post_send_buf_count); + ret_val = -1; + } + + return ret_val; +} + +static void iser_comp_error_worker(void *data) +{ + struct iser_conn *ib_conn = data; + + /* getting here when the state is UP means that the conn is being * + * terminated asynchronously from the iSCSI layer's perspective. */ + if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, + ISER_CONN_TERMINATING)) + iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); + + /* complete the termination process if disconnect event was delivered * + * note there are no more non completed posts to the QP */ + if (ib_conn->disc_evt_flag) { + ib_conn->state = ISER_CONN_DOWN; + wake_up_interruptible(&ib_conn->wait); + } +} + +static void iser_handle_comp_error(struct iser_desc *desc) +{ + struct iser_dto *dto = &desc->dto; + struct iser_conn *ib_conn = dto->conn->ib_conn; + + iser_dto_buffs_release(dto); + + if (desc->type == ISCSI_RX) { + kfree(desc->data); + kmem_cache_free(ig.desc_cache, desc); + atomic_dec(&ib_conn->post_recv_buf_count); + } else { /* type is TX control/command/dataout */ + if (desc->type == ISCSI_TX_DATAOUT) + kmem_cache_free(ig.desc_cache, desc); + atomic_dec(&ib_conn->post_send_buf_count); + } + + if (atomic_read(&ib_conn->post_recv_buf_count) == 0 && + atomic_read(&ib_conn->post_send_buf_count) == 0) + schedule_work(&ib_conn->comperror_work); +} + +static void iser_cq_tasklet_fn(unsigned long data) +{ + struct iser_device *device = (struct iser_device *)data; + struct ib_cq *cq = device->cq; + struct ib_wc wc; + struct iser_desc *desc; + unsigned long xfer_len; + + while (ib_poll_cq(cq, 1, &wc) == 1) { + desc = (struct iser_desc *) (unsigned long) wc.wr_id; + BUG_ON(desc == NULL); + + if (wc.status == IB_WC_SUCCESS) { + if (desc->type == ISCSI_RX) { + xfer_len = (unsigned long)wc.byte_len; + iser_rcv_completion(desc, xfer_len); + } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ + iser_snd_completion(desc); + } else { + iser_err("comp w. error op %d status %d\n",desc->type,wc.status); + iser_handle_comp_error(desc); + } + } + /* #warning "it is assumed here that arming CQ only once its empty" * + * " would not cause interrupts to be missed" */ + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); +} + +static void iser_cq_callback(struct ib_cq *cq, void *cq_context) +{ + struct iser_device *device = (struct iser_device *)cq_context; + + tasklet_schedule(&device->cq_tasklet); +} diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 9ae4361..d8e2fc9 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_SCSI_ISCSI_ATTRS) += scsi_t obj-$(CONFIG_SCSI_SAS_ATTRS) += scsi_transport_sas.o obj-$(CONFIG_ISCSI_TCP) += libiscsi.o iscsi_tcp.o +obj-$(CONFIG_INFINIBAND_ISER) += libiscsi.o obj-$(CONFIG_SCSI_AMIGA7XX) += amiga7xx.o 53c7xx.o obj-$(CONFIG_A3000_SCSI) += a3000.o wd33c93.o obj-$(CONFIG_A2091_SCSI) += a2091.o wd33c93.o diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h new file mode 100644 index 0000000..63381e1 --- /dev/null +++ b/include/rdma/rdma_user_cm.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_USER_CM_H +#define RDMA_USER_CM_H + +#include +#include +#include +#include + +#define RDMA_USER_CM_ABI_VERSION 1 + +#define RDMA_MAX_PRIVATE_DATA 256 + +enum { + RDMA_USER_CM_CMD_CREATE_ID, + RDMA_USER_CM_CMD_DESTROY_ID, + RDMA_USER_CM_CMD_BIND_ADDR, + RDMA_USER_CM_CMD_RESOLVE_ADDR, + RDMA_USER_CM_CMD_RESOLVE_ROUTE, + RDMA_USER_CM_CMD_QUERY_ROUTE, + RDMA_USER_CM_CMD_CONNECT, + RDMA_USER_CM_CMD_LISTEN, + RDMA_USER_CM_CMD_ACCEPT, + RDMA_USER_CM_CMD_REJECT, + RDMA_USER_CM_CMD_DISCONNECT, + RDMA_USER_CM_CMD_INIT_QP_ATTR, + RDMA_USER_CM_CMD_GET_EVENT +}; + +/* + * command ABI structures. + */ +struct rdma_ucm_cmd_hdr { + __u32 cmd; + __u16 in; + __u16 out; +}; + +struct rdma_ucm_create_id { + __u64 uid; + __u64 response; +}; + +struct rdma_ucm_create_id_resp { + __u32 id; +}; + +struct rdma_ucm_destroy_id { + __u64 response; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_destroy_id_resp { + __u32 events_reported; +}; + +struct rdma_ucm_bind_addr { + __u64 response; + struct sockaddr_in6 addr; + __u32 id; +}; + +struct rdma_ucm_resolve_addr { + struct sockaddr_in6 src_addr; + struct sockaddr_in6 dst_addr; + __u32 id; + __u32 timeout_ms; +}; + +struct rdma_ucm_resolve_route { + __u32 id; + __u32 timeout_ms; +}; + +struct rdma_ucm_query_route { + __u64 response; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_query_route_resp { + __u64 node_guid; + struct ib_user_path_rec ib_route[2]; + struct sockaddr_in6 src_addr; + struct sockaddr_in6 dst_addr; + __u32 num_paths; + __u8 port_num; + __u8 reserved[3]; +}; + +struct rdma_ucm_conn_param { + __u32 qp_num; + __u32 qp_type; + __u8 private_data[RDMA_MAX_PRIVATE_DATA]; + __u8 private_data_len; + __u8 srq; + __u8 responder_resources; + __u8 initiator_depth; + __u8 flow_control; + __u8 retry_count; + __u8 rnr_retry_count; + __u8 valid; +}; + +struct rdma_ucm_connect { + struct rdma_ucm_conn_param conn_param; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_listen { + __u32 id; + __u32 backlog; +}; + +struct rdma_ucm_accept { + __u64 uid; + struct rdma_ucm_conn_param conn_param; + __u32 id; + __u32 reserved; +}; + +struct rdma_ucm_reject { + __u32 id; + __u8 private_data_len; + __u8 reserved[3]; + __u8 private_data[RDMA_MAX_PRIVATE_DATA]; +}; + +struct rdma_ucm_disconnect { + __u32 id; +}; + +struct rdma_ucm_init_qp_attr { + __u64 response; + __u32 id; + __u32 qp_state; +}; + +struct rdma_ucm_get_event { + __u64 response; +}; + +struct rdma_ucm_event_resp { + __u64 uid; + __u32 id; + __u32 event; + __u32 status; + __u8 private_data_len; + __u8 reserved[3]; + __u8 private_data[RDMA_MAX_PRIVATE_DATA]; +}; + +#endif /* RDMA_USER_CM_H */