/*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include /* Amount of space available for write */ #define VMBUS_BR_WAVAIL(r, w, z) \ (((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w))) /* Increase bufing index */ #define VMBUS_BR_IDXINC(idx, inc, sz) (((idx) + (inc)) % (sz)) static int vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS); static int vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS); static void vmbus_br_setup(struct vmbus_br *, void *, int); static int vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS) { const struct vmbus_br *br = arg1; uint32_t rindex, windex, imask, psndsz, fvalue, ravail, wavail; uint64_t intrcnt; char state[256]; intrcnt = br->vbr_intrcnt; rindex = br->vbr_rindex; windex = br->vbr_windex; imask = br->vbr_imask; psndsz = br->vbr_psndsz; fvalue = br->vbr_fvalue; wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize); ravail = br->vbr_dsize - wavail; snprintf(state, sizeof(state), "intrcnt:%ju rindex:%u windex:%u imask:%u psndsz:%u fvalue:%u " "ravail:%u wavail:%u", (uintmax_t)intrcnt, rindex, windex, imask, psndsz, fvalue, ravail, wavail); return sysctl_handle_string(oidp, state, sizeof(state), req); } /* * Binary bufring states. */ static int vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS) { #define BR_STATE_RIDX 0 #define BR_STATE_WIDX 1 #define BR_STATE_IMSK 2 #define BR_STATE_PSSZ 3 #define BR_STATE_FVAL 4 #define BR_STATE_RSPC 5 #define BR_STATE_WSPC 6 #define BR_STATE_MAX 7 const struct vmbus_br *br = arg1; uint32_t rindex, windex, wavail, state[BR_STATE_MAX]; rindex = br->vbr_rindex; windex = br->vbr_windex; wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize); state[BR_STATE_RIDX] = rindex; state[BR_STATE_WIDX] = windex; state[BR_STATE_IMSK] = br->vbr_imask; state[BR_STATE_PSSZ] = br->vbr_psndsz; state[BR_STATE_FVAL] = br->vbr_fvalue; state[BR_STATE_WSPC] = wavail; state[BR_STATE_RSPC] = br->vbr_dsize - wavail; return sysctl_handle_opaque(oidp, state, sizeof(state), req); } void vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx, struct sysctl_oid *br_tree, struct vmbus_br *br, const char *name) { struct sysctl_oid *tree; char desc[64]; tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(br_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (tree == NULL) return; snprintf(desc, sizeof(desc), "%s state", name); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, br, 0, vmbus_br_sysctl_state, "A", desc); snprintf(desc, sizeof(desc), "%s binary state", name); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state_bin", CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, br, 0, vmbus_br_sysctl_state_bin, "IU", desc); } void vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr) { rbr->rxbr_imask = 1; mb(); } static __inline uint32_t vmbus_rxbr_avail(const struct vmbus_rxbr *rbr) { uint32_t rindex, windex; /* Get snapshot */ rindex = rbr->rxbr_rindex; windex = rbr->rxbr_windex; return (rbr->rxbr_dsize - VMBUS_BR_WAVAIL(rindex, windex, rbr->rxbr_dsize)); } uint32_t vmbus_rxbr_available(const struct vmbus_rxbr *rbr) { return (vmbus_rxbr_avail(rbr)); } uint32_t vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr) { rbr->rxbr_imask = 0; mb(); /* * Now check to see if the ring buffer is still empty. * If it is not, we raced and we need to process new * incoming channel packets. */ return vmbus_rxbr_avail(rbr); } static void vmbus_br_setup(struct vmbus_br *br, void *buf, int blen) { br->vbr = buf; br->vbr_dsize = blen - sizeof(struct vmbus_bufring); } void vmbus_rxbr_init(struct vmbus_rxbr *rbr) { mtx_init(&rbr->rxbr_lock, "vmbus_rxbr", NULL, MTX_SPIN); } void vmbus_rxbr_deinit(struct vmbus_rxbr *rbr) { mtx_destroy(&rbr->rxbr_lock); } void vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen) { vmbus_br_setup(&rbr->rxbr, buf, blen); } static __inline boolean_t vmbus_rxbr_need_signal(const struct vmbus_rxbr *rbr, uint32_t bytes_read) { uint32_t pending_snd_sz, canwrite_size; /* No need to signal if host doesn't want us to */ if (!rbr->rxbr_fpsndsz) return false; mb(); pending_snd_sz = rbr->rxbr_psndsz; /* No need to signal if host sets pending_snd_sz to 0 */ if (!pending_snd_sz) return false; mb(); canwrite_size = rbr->rxbr_dsize - vmbus_rxbr_avail(rbr); /* No need to signal if br already has enough space before read */ if (canwrite_size - bytes_read > pending_snd_sz) return false; /* * No need to signal if still doesn't have enough space * asked by host */ if (canwrite_size <= pending_snd_sz) return false; return true; } void vmbus_txbr_init(struct vmbus_txbr *tbr) { mtx_init(&tbr->txbr_lock, "vmbus_txbr", NULL, MTX_SPIN); } void vmbus_txbr_deinit(struct vmbus_txbr *tbr) { mtx_destroy(&tbr->txbr_lock); } void vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen) { vmbus_br_setup(&tbr->txbr, buf, blen); /* Set feature bit enabling flow control */ tbr->txbr_fpsndsz = 1; } uint32_t vmbus_txbr_get_imask(const struct vmbus_txbr *tbr) { mb(); return(tbr->txbr_imask); } void vmbus_txbr_set_pending_snd_sz(struct vmbus_txbr *tbr, uint32_t size) { tbr->txbr_psndsz = size; } /* * When we write to the ring buffer, check if the host needs to be * signaled. * * The contract: * - The host guarantees that while it is draining the TX bufring, * it will set the br_imask to indicate it does not need to be * interrupted when new data are added. * - The host guarantees that it will completely drain the TX bufring * before exiting the read loop. Further, once the TX bufring is * empty, it will clear the br_imask and re-check to see if new * data have arrived. */ static __inline boolean_t vmbus_txbr_need_signal(const struct vmbus_txbr *tbr, uint32_t old_windex) { mb(); if (tbr->txbr_imask) return (FALSE); __compiler_membar(); /* * This is the only case we need to signal when the * ring transitions from being empty to non-empty. */ if (old_windex == tbr->txbr_rindex) return (TRUE); return (FALSE); } static __inline uint32_t vmbus_txbr_avail(const struct vmbus_txbr *tbr) { uint32_t rindex, windex; /* Get snapshot */ rindex = tbr->txbr_rindex; windex = tbr->txbr_windex; return VMBUS_BR_WAVAIL(rindex, windex, tbr->txbr_dsize); } static __inline uint32_t vmbus_txbr_copyto(const struct vmbus_txbr *tbr, uint32_t windex, const void *src0, uint32_t cplen) { const uint8_t *src = src0; uint8_t *br_data = tbr->txbr_data; uint32_t br_dsize = tbr->txbr_dsize; if (cplen > br_dsize - windex) { uint32_t fraglen = br_dsize - windex; /* Wrap-around detected */ memcpy(br_data + windex, src, fraglen); memcpy(br_data, src + fraglen, cplen - fraglen); } else { memcpy(br_data + windex, src, cplen); } return VMBUS_BR_IDXINC(windex, cplen, br_dsize); } static __inline uint32_t vmbus_txbr_copyto_call(const struct vmbus_txbr *tbr, uint32_t windex, uint32_t cplen, vmbus_br_copy_callback_t cb, void *cbarg, int *ret) { uint8_t *br_data = tbr->txbr_data; uint32_t br_dsize = tbr->txbr_dsize; int err = 0; if (cplen > br_dsize - windex) { uint32_t fraglen = br_dsize - windex; /* Wrap-around detected */ err = cb((void *)(br_data + windex), fraglen, cbarg); if (!err) err = cb((void *)br_data, cplen - fraglen, cbarg); } else { err = cb((void *)(br_data + windex), cplen, cbarg); } *ret = err; return VMBUS_BR_IDXINC(windex, cplen, br_dsize); } uint32_t vmbus_txbr_available(const struct vmbus_txbr *tbr) { return (vmbus_txbr_avail(tbr)); } /* * NOTE: * Not holding lock when calling user provided callback routine. * Caller should hold lock to serialize ring buffer accesses. */ int vmbus_txbr_write_call(struct vmbus_txbr *tbr, const struct iovec iov[], int iovlen, vmbus_br_copy_callback_t cb, void *cbarg, boolean_t *need_sig) { uint32_t old_windex, windex, total; uint64_t save_windex; int i; int cb_ret = 0; total = 0; for (i = 0; i < iovlen; i++) total += iov[i].iov_len; total += sizeof(save_windex); /* * NOTE: * If this write is going to make br_windex same as br_rindex, * i.e. the available space for write is same as the write size, * we can't do it then, since br_windex == br_rindex means that * the bufring is empty. */ if (vmbus_txbr_avail(tbr) <= total) { return (EAGAIN); } /* Save br_windex for later use */ old_windex = tbr->txbr_windex; /* * Copy the scattered channel packet to the TX bufring. */ windex = old_windex; for (i = 0; i < iovlen; i++) { if (iov[i].iov_base != NULL) { windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len); } else if (cb != NULL) { windex = vmbus_txbr_copyto_call(tbr, windex, iov[i].iov_len, cb, cbarg, &cb_ret); /* * If callback fails, return without updating * write index. */ if (cb_ret) return (cb_ret); } } mtx_lock_spin(&tbr->txbr_lock); /* * Set the offset of the current channel packet. */ save_windex = ((uint64_t)old_windex) << 32; windex = vmbus_txbr_copyto(tbr, windex, &save_windex, sizeof(save_windex)); /* * Update the write index _after_ the channel packet * is copied. */ __compiler_membar(); tbr->txbr_windex = windex; mtx_unlock_spin(&tbr->txbr_lock); if (need_sig) *need_sig = vmbus_txbr_need_signal(tbr, old_windex); return (0); } /* * Write scattered channel packet to TX bufring. * * The offset of this channel packet is written as a 64bits value * immediately after this channel packet. */ int vmbus_txbr_write(struct vmbus_txbr *tbr, const struct iovec iov[], int iovlen, boolean_t *need_sig) { uint32_t old_windex, windex, total; uint64_t save_windex; int i; total = 0; for (i = 0; i < iovlen; i++) total += iov[i].iov_len; total += sizeof(save_windex); mtx_lock_spin(&tbr->txbr_lock); /* * NOTE: * If this write is going to make br_windex same as br_rindex, * i.e. the available space for write is same as the write size, * we can't do it then, since br_windex == br_rindex means that * the bufring is empty. */ if (vmbus_txbr_avail(tbr) <= total) { mtx_unlock_spin(&tbr->txbr_lock); return (EAGAIN); } /* Save br_windex for later use */ old_windex = tbr->txbr_windex; /* * Copy the scattered channel packet to the TX bufring. */ windex = old_windex; for (i = 0; i < iovlen; i++) { windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len); } /* * Set the offset of the current channel packet. */ save_windex = ((uint64_t)old_windex) << 32; windex = vmbus_txbr_copyto(tbr, windex, &save_windex, sizeof(save_windex)); /* * Update the write index _after_ the channel packet * is copied. */ __compiler_membar(); tbr->txbr_windex = windex; mtx_unlock_spin(&tbr->txbr_lock); *need_sig = vmbus_txbr_need_signal(tbr, old_windex); return (0); } static __inline uint32_t vmbus_rxbr_copyfrom(const struct vmbus_rxbr *rbr, uint32_t rindex, void *dst0, int cplen) { uint8_t *dst = dst0; const uint8_t *br_data = rbr->rxbr_data; uint32_t br_dsize = rbr->rxbr_dsize; if (cplen > br_dsize - rindex) { uint32_t fraglen = br_dsize - rindex; /* Wrap-around detected. */ memcpy(dst, br_data + rindex, fraglen); memcpy(dst + fraglen, br_data, cplen - fraglen); } else { memcpy(dst, br_data + rindex, cplen); } return VMBUS_BR_IDXINC(rindex, cplen, br_dsize); } static __inline uint32_t vmbus_rxbr_copyfrom_call(const struct vmbus_rxbr *rbr, uint32_t rindex, int cplen, vmbus_br_copy_callback_t cb, void *cbarg) { uint8_t *br_data = rbr->rxbr_data; uint32_t br_dsize = rbr->rxbr_dsize; int error = 0; if (cplen > br_dsize - rindex) { uint32_t fraglen = br_dsize - rindex; /* Wrap-around detected. */ error = cb((void *)(br_data + rindex), fraglen, cbarg); if (!error) error = cb((void *)br_data, cplen - fraglen, cbarg); } else { error = cb((void *)(br_data + rindex), cplen, cbarg); } return (error); } int vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen) { mtx_lock_spin(&rbr->rxbr_lock); /* * The requested data and the 64bits channel packet * offset should be there at least. */ if (vmbus_rxbr_avail(rbr) < dlen + sizeof(uint64_t)) { mtx_unlock_spin(&rbr->rxbr_lock); return (EAGAIN); } vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen); mtx_unlock_spin(&rbr->rxbr_lock); return (0); } /* * NOTE: * We only hold spin lock to check the ring buffer space. It is * released before calling user provided callback routine. * Caller should hold lock to serialize ring buffer accesses. */ int vmbus_rxbr_peek_call(struct vmbus_rxbr *rbr, int dlen, uint32_t skip, vmbus_br_copy_callback_t cb, void *cbarg) { uint32_t rindex, br_dsize0 = rbr->rxbr_dsize; int ret; mtx_lock_spin(&rbr->rxbr_lock); /* * The requested data + skip and the 64bits channel packet * offset should be there at least. */ if (vmbus_rxbr_avail(rbr) < skip + dlen + sizeof(uint64_t)) { mtx_unlock_spin(&rbr->rxbr_lock); return (EAGAIN); } rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize0); mtx_unlock_spin(&rbr->rxbr_lock); ret = vmbus_rxbr_copyfrom_call(rbr, rindex, dlen, cb, cbarg); return (ret); } /* * NOTE: * We assume idx_adv == sizeof(channel packet). */ int vmbus_rxbr_idxadv_peek(struct vmbus_rxbr *rbr, void *data, int dlen, uint32_t idx_adv, boolean_t *need_sig) { uint32_t rindex, br_dsize = rbr->rxbr_dsize; mtx_lock_spin(&rbr->rxbr_lock); /* * Make sure it has enough data to read. */ if (vmbus_rxbr_avail(rbr) < idx_adv + sizeof(uint64_t) + dlen) { mtx_unlock_spin(&rbr->rxbr_lock); return (EAGAIN); } if (idx_adv > 0) { /* * Advance the read index first, including the channel's 64bit * previous write offset. */ rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, idx_adv + sizeof(uint64_t), br_dsize); __compiler_membar(); rbr->rxbr_rindex = rindex; } vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen); mtx_unlock_spin(&rbr->rxbr_lock); if (need_sig) { if (idx_adv > 0) *need_sig = vmbus_rxbr_need_signal(rbr, idx_adv + sizeof(uint64_t)); else *need_sig = false; } return (0); } /* * NOTE: * Just update the RX rb index. */ int vmbus_rxbr_idxadv(struct vmbus_rxbr *rbr, uint32_t idx_adv, boolean_t *need_sig) { uint32_t rindex, br_dsize = rbr->rxbr_dsize; mtx_lock_spin(&rbr->rxbr_lock); /* * Make sure it has enough space to advance. */ if (vmbus_rxbr_avail(rbr) < idx_adv + sizeof(uint64_t)) { mtx_unlock_spin(&rbr->rxbr_lock); return (EAGAIN); } /* * Advance the read index, including the channel's 64bit * previous write offset. */ rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, idx_adv + sizeof(uint64_t), br_dsize); __compiler_membar(); rbr->rxbr_rindex = rindex; mtx_unlock_spin(&rbr->rxbr_lock); if (need_sig) { *need_sig = vmbus_rxbr_need_signal(rbr, idx_adv + sizeof(uint64_t)); } return (0); } /* * NOTE: * We assume (dlen + skip) == sizeof(channel packet). */ int vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen, uint32_t skip, boolean_t *need_sig) { uint32_t rindex, br_dsize = rbr->rxbr_dsize; KASSERT(dlen + skip > 0, ("invalid dlen %d, offset %u", dlen, skip)); mtx_lock_spin(&rbr->rxbr_lock); if (vmbus_rxbr_avail(rbr) < dlen + skip + sizeof(uint64_t)) { mtx_unlock_spin(&rbr->rxbr_lock); return (EAGAIN); } /* * Copy channel packet from RX bufring. */ rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize); rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen); /* * Discard this channel packet's 64bits offset, which is useless to us. */ rindex = VMBUS_BR_IDXINC(rindex, sizeof(uint64_t), br_dsize); /* * Update the read index _after_ the channel packet is fetched. */ __compiler_membar(); rbr->rxbr_rindex = rindex; mtx_unlock_spin(&rbr->rxbr_lock); if (need_sig) { *need_sig = vmbus_rxbr_need_signal(rbr, dlen + skip + sizeof(uint64_t)); } return (0); }