/*- * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef ICL_ISER_H #define ICL_ISER_H /* * iSCSI Common Layer for RDMA. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ISER_DBG(X, ...) \ do { \ if (unlikely(iser_debug > 2)) \ printf("DEBUG: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } while (0) #define ISER_INFO(X, ...) \ do { \ if (unlikely(iser_debug > 1)) \ printf("INFO: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } while (0) #define ISER_WARN(X, ...) \ do { \ if (unlikely(iser_debug > 0)) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define ISER_ERR(X, ...) \ printf("ERROR: %s: " X "\n", __func__, ## __VA_ARGS__) #define ISER_VER 0x10 #define ISER_WSV 0x08 #define ISER_RSV 0x04 #define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL #define ISER_BEACON_WRID 0xfffffffffffffffeULL #define SHIFT_4K 12 #define SIZE_4K (1ULL << SHIFT_4K) #define MASK_4K (~(SIZE_4K-1)) /* support up to 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) #define ISER_DEF_XMIT_CMDS_MAX 256 /* the max RX (recv) WR supported by the iSER QP is defined by * * max_recv_wr = commands_max + recv_beacon */ #define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX + 1) #define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2) /* QP settings */ /* Maximal bounds on received asynchronous PDUs */ #define ISER_MAX_RX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */ #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), SCSI_TMFUNC(2), LOGOUT(1) */ /* the max TX (send) WR supported by the iSER QP is defined by * * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * * to have at max for SCSI command. The tx posting & completion handling code * * supports -EAGAIN scheme where tx is suspended till the QP has room for more * * send WR. D=8 comes from 64K/8K */ #define ISER_INFLIGHT_DATAOUTS 8 /* the send_beacon increase the max_send_wr by 1 */ #define ISER_QP_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \ (1 + ISER_INFLIGHT_DATAOUTS) + \ ISER_MAX_TX_MISC_PDUS + \ ISER_MAX_RX_MISC_PDUS + 1) #define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr \ - ISER_MAX_TX_MISC_PDUS \ - ISER_MAX_RX_MISC_PDUS - 1) / \ (1 + ISER_INFLIGHT_DATAOUTS)) #define ISER_WC_BATCH_COUNT 16 #define ISER_SIGNAL_CMD_COUNT 32 /* Maximal QP's recommended per CQ. In case we use more QP's per CQ we might * * encounter a CQ overrun state. */ #define ISCSI_ISER_MAX_CONN 8 #define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) #define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) #define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \ ISCSI_ISER_MAX_CONN) #define ISER_ZBVA_NOT_SUPPORTED 0x80 #define ISER_SEND_W_INV_NOT_SUPPORTED 0x40 #define ISCSI_DEF_MAX_RECV_SEG_LEN 8192 #define ISCSI_OPCODE_MASK 0x3f #define icl_to_iser_conn(ic) \ container_of(ic, struct iser_conn, icl_conn) #define icl_to_iser_pdu(ip) \ container_of(ip, struct icl_iser_pdu, icl_pdu) /** * struct iser_hdr - iSER header * * @flags: flags support (zbva, remote_inv) * @rsvd: reserved * @write_stag: write rkey * @write_va: write virtual address * @reaf_stag: read rkey * @read_va: read virtual address */ struct iser_hdr { u8 flags; u8 rsvd[3]; __be32 write_stag; __be64 write_va; __be32 read_stag; __be64 read_va; } __attribute__((packed)); struct iser_cm_hdr { u8 flags; u8 rsvd[3]; } __packed; /* Constant PDU lengths calculations */ #define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + ISCSI_BHS_SIZE) #define ISER_RECV_DATA_SEG_LEN 128 #define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN) #define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN) enum iser_conn_state { ISER_CONN_INIT, /* descriptor allocd, no conn */ ISER_CONN_PENDING, /* in the process of being established */ ISER_CONN_UP, /* up and running */ ISER_CONN_TERMINATING, /* in the process of being terminated */ ISER_CONN_DOWN, /* shut down */ ISER_CONN_STATES_NUM }; enum iser_task_status { ISER_TASK_STATUS_INIT = 0, ISER_TASK_STATUS_STARTED, ISER_TASK_STATUS_COMPLETED }; enum iser_data_dir { ISER_DIR_IN = 0, /* to initiator */ ISER_DIR_OUT, /* from initiator */ ISER_DIRS_NUM }; /** * struct iser_mem_reg - iSER memory registration info * * @sge: memory region sg element * @rkey: memory region remote key * @mem_h: pointer to registration context (FMR/Fastreg) */ struct iser_mem_reg { struct ib_sge sge; u32 rkey; void *mem_h; }; enum iser_desc_type { ISCSI_TX_CONTROL , ISCSI_TX_SCSI_COMMAND, ISCSI_TX_DATAOUT }; /** * struct iser_data_buf - iSER data buffer * * @sg: pointer to the sg list * @size: num entries of this sg * @data_len: total beffer byte len * @dma_nents: returned by dma_map_sg * @copy_buf: allocated copy buf for SGs unaligned * for rdma which are copied * @orig_sg: pointer to the original sg list (in case * we used a copy) * @sg_single: SG-ified clone of a non SG SC or * unaligned SG */ struct iser_data_buf { struct scatterlist sgl[ISCSI_ISER_SG_TABLESIZE]; void *sg; int size; unsigned long data_len; unsigned int dma_nents; char *copy_buf; struct scatterlist *orig_sg; struct scatterlist sg_single; }; /* fwd declarations */ struct iser_conn; struct ib_conn; struct iser_device; /** * struct iser_tx_desc - iSER TX descriptor (for send wr_id) * * @iser_header: iser header * @iscsi_header: iscsi header (bhs) * @type: command/control/dataout * @dma_addr: header buffer dma_address * @tx_sg: sg[0] points to iser/iscsi headers * sg[1] optionally points to either of immediate data * unsolicited data-out or control * @num_sge: number sges used on this TX task * @mapped: indicates if the descriptor is dma mapped */ struct iser_tx_desc { struct iser_hdr iser_header; struct iscsi_bhs iscsi_header __attribute__((packed)); enum iser_desc_type type; u64 dma_addr; struct ib_sge tx_sg[2]; int num_sge; bool mapped; }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ sizeof(u64) + sizeof(struct ib_sge))) /** * struct iser_rx_desc - iSER RX descriptor (for recv wr_id) * * @iser_header: iser header * @iscsi_header: iscsi header * @data: received data segment * @dma_addr: receive buffer dma address * @rx_sg: ib_sge of receive buffer * @pad: for sense data TODO: Modify to maximum sense length supported */ struct iser_rx_desc { struct iser_hdr iser_header; struct iscsi_bhs iscsi_header; char data[ISER_RECV_DATA_SEG_LEN]; u64 dma_addr; struct ib_sge rx_sg; char pad[ISER_RX_PAD_SIZE]; } __attribute__((packed)); struct icl_iser_pdu { struct icl_pdu icl_pdu; struct iser_tx_desc desc; struct iser_conn *iser_conn; enum iser_task_status status; struct ccb_scsiio *csio; int command_sent; int dir[ISER_DIRS_NUM]; struct iser_mem_reg rdma_reg[ISER_DIRS_NUM]; struct iser_data_buf data[ISER_DIRS_NUM]; }; /** * struct iser_comp - iSER completion context * * @device: pointer to device handle * @cq: completion queue * @wcs: work completion array * @tq: taskqueue handle * @task: task to run task_fn * @active_qps: Number of active QPs attached * to completion context */ struct iser_comp { struct iser_device *device; struct ib_cq *cq; struct ib_wc wcs[ISER_WC_BATCH_COUNT]; struct taskqueue *tq; struct task task; int active_qps; }; /** * struct iser_device - iSER device handle * * @ib_device: RDMA device * @pd: Protection Domain for this device * @dev_attr: Device attributes container * @mr: Global DMA memory region * @event_handler: IB events handle routine * @ig_list: entry in devices list * @refcount: Reference counter, dominated by open iser connections * @comps_used: Number of completion contexts used, Min between online * cpus and device max completion vectors * @comps: Dinamically allocated array of completion handlers */ struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; struct ib_device_attr dev_attr; struct ib_mr *mr; struct ib_event_handler event_handler; struct list_head ig_list; int refcount; int comps_used; struct iser_comp *comps; }; /** * struct iser_reg_resources - Fast registration recources * * @mr: memory region * @mr_valid: is mr valid indicator */ struct iser_reg_resources { struct ib_mr *mr; u8 mr_valid:1; }; /** * struct fast_reg_descriptor - Fast registration descriptor * * @list: entry in connection fastreg pool * @rsc: data buffer registration resources */ struct fast_reg_descriptor { struct list_head list; struct iser_reg_resources rsc; }; /** * struct iser_beacon - beacon to signal all flush errors were drained * * @send: send wr * @recv: recv wr * @flush_lock: protects flush_cv * @flush_cv: condition variable for beacon flush */ struct iser_beacon { union { struct ib_send_wr send; struct ib_recv_wr recv; }; struct mtx flush_lock; struct cv flush_cv; }; /** * struct ib_conn - Infiniband related objects * * @cma_id: rdma_cm connection maneger handle * @qp: Connection Queue-pair * @device: reference to iser device * @comp: iser completion context */ struct ib_conn { struct rdma_cm_id *cma_id; struct ib_qp *qp; int post_recv_buf_count; u8 sig_count; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; struct iser_comp *comp; struct iser_beacon beacon; struct mtx lock; union { struct { struct ib_fmr_pool *pool; struct iser_page_vec *page_vec; } fmr; struct { struct list_head pool; int pool_size; } fastreg; }; }; struct iser_conn { struct icl_conn icl_conn; struct ib_conn ib_conn; struct cv up_cv; struct list_head conn_list; struct sx state_mutex; enum iser_conn_state state; int qp_max_recv_dtos; int min_posted_rx; u16 max_cmds; char *login_buf; char *login_req_buf, *login_resp_buf; u64 login_req_dma, login_resp_dma; unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; u32 num_rx_descs; bool handoff_done; }; /** * struct iser_global: iSER global context * * @device_list_mutex: protects device_list * @device_list: iser devices global list * @connlist_mutex: protects connlist * @connlist: iser connections global list * @desc_cache: kmem cache for tx dataout * @close_conns_mutex: serializes conns closure */ struct iser_global { struct sx device_list_mutex; struct list_head device_list; struct mtx connlist_mutex; struct list_head connlist; struct sx close_conns_mutex; }; extern struct iser_global ig; extern int iser_debug; void iser_create_send_desc(struct iser_conn *, struct iser_tx_desc *); int iser_post_recvl(struct iser_conn *); int iser_post_recvm(struct iser_conn *, int); int iser_alloc_login_buf(struct iser_conn *iser_conn); void iser_free_login_buf(struct iser_conn *iser_conn); int iser_post_send(struct ib_conn *, struct iser_tx_desc *, bool); void iser_snd_completion(struct iser_tx_desc *, struct ib_conn *); void iser_rcv_completion(struct iser_rx_desc *, unsigned long, struct ib_conn *); void iser_pdu_free(struct icl_conn *, struct icl_pdu *); struct icl_pdu * iser_new_pdu(struct icl_conn *ic, int flags); int iser_alloc_rx_descriptors(struct iser_conn *, int); void iser_free_rx_descriptors(struct iser_conn *); int iser_initialize_headers(struct icl_iser_pdu *, struct iser_conn *); int iser_send_control(struct iser_conn *, struct icl_iser_pdu *); int iser_send_command(struct iser_conn *, struct icl_iser_pdu *); int iser_reg_rdma_mem(struct icl_iser_pdu *, enum iser_data_dir); void iser_unreg_rdma_mem(struct icl_iser_pdu *, enum iser_data_dir); int iser_create_fastreg_pool(struct ib_conn *, unsigned); void iser_free_fastreg_pool(struct ib_conn *); int iser_dma_map_task_data(struct icl_iser_pdu *, struct iser_data_buf *, enum iser_data_dir, enum dma_data_direction); int iser_conn_terminate(struct iser_conn *); void iser_free_ib_conn_res(struct iser_conn *, bool); void iser_dma_unmap_task_data(struct icl_iser_pdu *, struct iser_data_buf *, enum dma_data_direction); int iser_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *); #endif /* !ICL_ISER_H */