/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2024 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_AMDIOMMU_CTX, "amdiommu_ctx", "AMD IOMMU Context"); static MALLOC_DEFINE(M_AMDIOMMU_DOMAIN, "amdiommu_dom", "AMD IOMMU Domain"); static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit, struct amdiommu_domain *domain); static struct amdiommu_dte * amdiommu_get_dtep(struct amdiommu_ctx *ctx) { return (&CTX2AMD(ctx)->dev_tbl[ctx->context.rid]); } void amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, bool cansleep) { struct amdiommu_domain *domain; struct amdiommu_unit *unit; domain = IODOM2DOM(entry->domain); unit = DOM2AMD(domain); /* * If "free" is false, then the IOTLB invalidation must be performed * synchronously. Otherwise, the caller might free the entry before * dmar_qi_task() is finished processing it. */ if (free) { AMDIOMMU_LOCK(unit); iommu_qi_invalidate_locked(&domain->iodom, entry, true); AMDIOMMU_UNLOCK(unit); } else { iommu_qi_invalidate_sync(&domain->iodom, entry->start, entry->end - entry->start, cansleep); iommu_domain_free_entry(entry, false); } } static bool amdiommu_domain_unload_emit_wait(struct amdiommu_domain *domain, struct iommu_map_entry *entry) { return (true); /* XXXKIB */ } void amdiommu_domain_unload(struct iommu_domain *iodom, struct iommu_map_entries_tailq *entries, bool cansleep) { struct amdiommu_domain *domain; struct amdiommu_unit *unit; struct iommu_map_entry *entry, *entry1; int error __diagused; domain = IODOM2DOM(iodom); unit = DOM2AMD(domain); TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, ("not mapped entry %p %p", domain, entry)); error = iodom->ops->unmap(iodom, entry, cansleep ? IOMMU_PGF_WAITOK : 0); KASSERT(error == 0, ("unmap %p error %d", domain, error)); } if (TAILQ_EMPTY(entries)) return; AMDIOMMU_LOCK(unit); while ((entry = TAILQ_FIRST(entries)) != NULL) { TAILQ_REMOVE(entries, entry, dmamap_link); iommu_qi_invalidate_locked(&domain->iodom, entry, amdiommu_domain_unload_emit_wait(domain, entry)); } AMDIOMMU_UNLOCK(unit); } static void amdiommu_domain_destroy(struct amdiommu_domain *domain) { struct iommu_domain *iodom; struct amdiommu_unit *unit; iodom = DOM2IODOM(domain); KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries), ("unfinished unloads %p", domain)); KASSERT(LIST_EMPTY(&iodom->contexts), ("destroying dom %p with contexts", domain)); KASSERT(domain->ctx_cnt == 0, ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt)); KASSERT(domain->refs == 0, ("destroying dom %p with refs %d", domain, domain->refs)); if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) { AMDIOMMU_DOMAIN_LOCK(domain); iommu_gas_fini_domain(iodom); AMDIOMMU_DOMAIN_UNLOCK(domain); } if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) { if (domain->pgtbl_obj != NULL) AMDIOMMU_DOMAIN_PGLOCK(domain); amdiommu_domain_free_pgtbl(domain); } iommu_domain_fini(iodom); unit = DOM2AMD(domain); free_unr(unit->domids, domain->domain); free(domain, M_AMDIOMMU_DOMAIN); } static iommu_gaddr_t lvl2addr(int lvl) { int x; x = IOMMU_PAGE_SHIFT + IOMMU_NPTEPGSHIFT * lvl; /* Level 6 has only 8 bits for page table index */ if (x >= NBBY * sizeof(uint64_t)) return (-1ull); return (1ull < (1ull << x)); } static void amdiommu_domain_init_pglvl(struct amdiommu_unit *unit, struct amdiommu_domain *domain) { iommu_gaddr_t end; int hats, i; uint64_t efr_hats; end = DOM2IODOM(domain)->end; for (i = AMDIOMMU_PGTBL_MAXLVL; i > 1; i--) { if (lvl2addr(i) >= end && lvl2addr(i - 1) < end) break; } domain->pglvl = i; efr_hats = unit->efr & AMDIOMMU_EFR_HATS_MASK; switch (efr_hats) { case AMDIOMMU_EFR_HATS_6LVL: hats = 6; break; case AMDIOMMU_EFR_HATS_5LVL: hats = 5; break; case AMDIOMMU_EFR_HATS_4LVL: hats = 4; break; default: printf("amdiommu%d: HATS %#jx (reserved) ignoring\n", unit->iommu.unit, (uintmax_t)efr_hats); return; } if (hats >= domain->pglvl) return; printf("amdiommu%d: domain %d HATS %d pglvl %d reducing to HATS\n", unit->iommu.unit, domain->domain, hats, domain->pglvl); domain->pglvl = hats; domain->iodom.end = lvl2addr(hats); } static struct amdiommu_domain * amdiommu_domain_alloc(struct amdiommu_unit *unit, bool id_mapped) { struct amdiommu_domain *domain; struct iommu_domain *iodom; int error, id; id = alloc_unr(unit->domids); if (id == -1) return (NULL); domain = malloc(sizeof(*domain), M_AMDIOMMU_DOMAIN, M_WAITOK | M_ZERO); iodom = DOM2IODOM(domain); domain->domain = id; LIST_INIT(&iodom->contexts); iommu_domain_init(AMD2IOMMU(unit), iodom, &amdiommu_domain_map_ops); domain->unit = unit; domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR; amdiommu_domain_init_pglvl(unit, domain); iommu_gas_init_domain(DOM2IODOM(domain)); if (id_mapped) { domain->iodom.flags |= IOMMU_DOMAIN_IDMAP; } else { error = amdiommu_domain_alloc_pgtbl(domain); if (error != 0) goto fail; /* Disable local apic region access */ error = iommu_gas_reserve_region(iodom, 0xfee00000, 0xfeefffff + 1, &iodom->msi_entry); if (error != 0) goto fail; } return (domain); fail: amdiommu_domain_destroy(domain); return (NULL); } static struct amdiommu_ctx * amdiommu_ctx_alloc(struct amdiommu_domain *domain, uint16_t rid) { struct amdiommu_ctx *ctx; ctx = malloc(sizeof(*ctx), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO); ctx->context.domain = DOM2IODOM(domain); ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO); ctx->context.rid = rid; ctx->context.refs = 1; return (ctx); } static void amdiommu_ctx_link(struct amdiommu_ctx *ctx) { struct amdiommu_domain *domain; domain = CTX2DOM(ctx); IOMMU_ASSERT_LOCKED(domain->iodom.iommu); KASSERT(domain->refs >= domain->ctx_cnt, ("dom %p ref underflow %d %d", domain, domain->refs, domain->ctx_cnt)); domain->refs++; domain->ctx_cnt++; LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link); } static void amdiommu_ctx_unlink(struct amdiommu_ctx *ctx) { struct amdiommu_domain *domain; domain = CTX2DOM(ctx); IOMMU_ASSERT_LOCKED(domain->iodom.iommu); KASSERT(domain->refs > 0, ("domain %p ctx dtr refs %d", domain, domain->refs)); KASSERT(domain->ctx_cnt >= domain->refs, ("domain %p ctx dtr refs %d ctx_cnt %d", domain, domain->refs, domain->ctx_cnt)); domain->refs--; domain->ctx_cnt--; LIST_REMOVE(&ctx->context, link); } struct amdiommu_ctx * amdiommu_find_ctx_locked(struct amdiommu_unit *unit, uint16_t rid) { struct amdiommu_domain *domain; struct iommu_ctx *ctx; AMDIOMMU_ASSERT_LOCKED(unit); LIST_FOREACH(domain, &unit->domains, link) { LIST_FOREACH(ctx, &domain->iodom.contexts, link) { if (ctx->rid == rid) return (IOCTX2CTX(ctx)); } } return (NULL); } struct amdiommu_domain * amdiommu_find_domain(struct amdiommu_unit *unit, uint16_t rid) { struct amdiommu_domain *domain; struct iommu_ctx *ctx; AMDIOMMU_LOCK(unit); LIST_FOREACH(domain, &unit->domains, link) { LIST_FOREACH(ctx, &domain->iodom.contexts, link) { if (ctx->rid == rid) break; } } AMDIOMMU_UNLOCK(unit); return (domain); } static void amdiommu_free_ctx_locked(struct amdiommu_unit *unit, struct amdiommu_ctx *ctx) { struct amdiommu_dte *dtep; struct amdiommu_domain *domain; AMDIOMMU_ASSERT_LOCKED(unit); KASSERT(ctx->context.refs >= 1, ("amdiommu %p ctx %p refs %u", unit, ctx, ctx->context.refs)); /* * If our reference is not last, only the dereference should * be performed. */ if (ctx->context.refs > 1) { ctx->context.refs--; AMDIOMMU_UNLOCK(unit); return; } KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0, ("lost ref on disabled ctx %p", ctx)); /* * Otherwise, the device table entry must be cleared before * the page table is destroyed. */ dtep = amdiommu_get_dtep(ctx); dtep->v = 0; atomic_thread_fence_rel(); memset(dtep, 0, sizeof(*dtep)); domain = CTX2DOM(ctx); amdiommu_qi_invalidate_ctx_locked_nowait(ctx); amdiommu_qi_invalidate_ir_locked_nowait(unit, ctx->context.rid); amdiommu_qi_invalidate_all_pages_locked_nowait(domain); amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx))); if (unit->irte_enabled) amdiommu_ctx_fini_irte(ctx); amdiommu_ctx_unlink(ctx); free(ctx->context.tag, M_AMDIOMMU_CTX); free(ctx, M_AMDIOMMU_CTX); amdiommu_unref_domain_locked(unit, domain); } static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit, struct amdiommu_domain *domain) { AMDIOMMU_ASSERT_LOCKED(unit); KASSERT(domain->refs >= 1, ("amdiommu%d domain %p refs %u", unit->iommu.unit, domain, domain->refs)); KASSERT(domain->refs > domain->ctx_cnt, ("amdiommu%d domain %p refs %d ctx_cnt %d", unit->iommu.unit, domain, domain->refs, domain->ctx_cnt)); if (domain->refs > 1) { domain->refs--; AMDIOMMU_UNLOCK(unit); return; } LIST_REMOVE(domain, link); AMDIOMMU_UNLOCK(unit); taskqueue_drain(unit->iommu.delayed_taskqueue, &domain->iodom.unload_task); amdiommu_domain_destroy(domain); } static void dte_entry_init_one(struct amdiommu_dte *dtep, struct amdiommu_ctx *ctx, vm_page_t pgtblr, uint8_t dte, uint32_t edte) { struct amdiommu_domain *domain; struct amdiommu_unit *unit; domain = CTX2DOM(ctx); unit = DOM2AMD(domain); dtep->tv = 1; /* dtep->had not used for now */ dtep->ir = 1; dtep->iw = 1; dtep->domainid = domain->domain; dtep->pioctl = AMDIOMMU_DTE_PIOCTL_DIS; /* fill device interrupt passing hints from IVHD. */ dtep->initpass = (dte & ACPI_IVHD_INIT_PASS) != 0; dtep->eintpass = (dte & ACPI_IVHD_EINT_PASS) != 0; dtep->nmipass = (dte & ACPI_IVHD_NMI_PASS) != 0; dtep->sysmgt = (dte & ACPI_IVHD_SYSTEM_MGMT) >> 4; dtep->lint0pass = (dte & ACPI_IVHD_LINT0_PASS) != 0; dtep->lint1pass = (dte & ACPI_IVHD_LINT1_PASS) != 0; if (unit->irte_enabled) { dtep->iv = 1; dtep->i = 0; dtep->inttablen = ilog2_local(unit->irte_nentries); dtep->intrroot = pmap_kextract(unit->irte_x2apic ? (vm_offset_t)ctx->irtx2 : (vm_offset_t)ctx->irtb) >> 6; dtep->intctl = AMDIOMMU_DTE_INTCTL_MAP; } if ((DOM2IODOM(domain)->flags & IOMMU_DOMAIN_IDMAP) != 0) { dtep->pgmode = AMDIOMMU_DTE_PGMODE_1T1; } else { MPASS(domain->pglvl > 0 && domain->pglvl <= AMDIOMMU_PGTBL_MAXLVL); dtep->pgmode = domain->pglvl; dtep->ptroot = VM_PAGE_TO_PHYS(pgtblr) >> 12; } atomic_thread_fence_rel(); dtep->v = 1; } static void dte_entry_init(struct amdiommu_ctx *ctx, bool move, uint8_t dte, uint32_t edte) { struct amdiommu_dte *dtep; struct amdiommu_unit *unit; struct amdiommu_domain *domain; int i; domain = CTX2DOM(ctx); unit = DOM2AMD(domain); dtep = amdiommu_get_dtep(ctx); KASSERT(dtep->v == 0, ("amdiommu%d initializing valid dte @%p %#jx", CTX2AMD(ctx)->iommu.unit, dtep, (uintmax_t)(*(uint64_t *)dtep))); if (iommu_is_buswide_ctx(AMD2IOMMU(unit), PCI_RID2BUS(ctx->context.rid))) { MPASS(!move); for (i = 0; i <= PCI_BUSMAX; i++) { dte_entry_init_one(&dtep[i], ctx, domain->pgtblr, dte, edte); } } else { dte_entry_init_one(dtep, ctx, domain->pgtblr, dte, edte); } } struct amdiommu_ctx * amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit, device_t dev, uint16_t rid, int dev_domain, bool id_mapped, bool rmrr_init, uint8_t dte, uint32_t edte) { struct amdiommu_domain *domain, *domain1; struct amdiommu_ctx *ctx, *ctx1; int bus, slot, func; if (dev != NULL) { bus = pci_get_bus(dev); slot = pci_get_slot(dev); func = pci_get_function(dev); } else { bus = PCI_RID2BUS(rid); slot = PCI_RID2SLOT(rid); func = PCI_RID2FUNC(rid); } AMDIOMMU_LOCK(unit); KASSERT(!iommu_is_buswide_ctx(AMD2IOMMU(unit), bus) || (slot == 0 && func == 0), ("iommu%d pci%d:%d:%d get_ctx for buswide", AMD2IOMMU(unit)->unit, bus, slot, func)); ctx = amdiommu_find_ctx_locked(unit, rid); if (ctx == NULL) { /* * Perform the allocations which require sleep or have * higher chance to succeed if the sleep is allowed. */ AMDIOMMU_UNLOCK(unit); domain1 = amdiommu_domain_alloc(unit, id_mapped); if (domain1 == NULL) return (NULL); if (!id_mapped) { /* * XXXKIB IVMD seems to be less significant * and less used on AMD than RMRR on Intel. * Not implemented for now. */ } ctx1 = amdiommu_ctx_alloc(domain1, rid); amdiommu_ctx_init_irte(ctx1); AMDIOMMU_LOCK(unit); /* * Recheck the contexts, other thread might have * already allocated needed one. */ ctx = amdiommu_find_ctx_locked(unit, rid); if (ctx == NULL) { domain = domain1; ctx = ctx1; amdiommu_ctx_link(ctx); ctx->context.tag->owner = dev; iommu_device_tag_init(CTX2IOCTX(ctx), dev); LIST_INSERT_HEAD(&unit->domains, domain, link); dte_entry_init(ctx, false, dte, edte); amdiommu_qi_invalidate_ctx_locked(ctx); if (dev != NULL) { device_printf(dev, "amdiommu%d pci%d:%d:%d:%d rid %x domain %d " "%s-mapped\n", AMD2IOMMU(unit)->unit, unit->unit_dom, bus, slot, func, rid, domain->domain, id_mapped ? "id" : "re"); } } else { amdiommu_domain_destroy(domain1); /* Nothing needs to be done to destroy ctx1. */ free(ctx1, M_AMDIOMMU_CTX); domain = CTX2DOM(ctx); ctx->context.refs++; /* tag referenced us */ } } else { domain = CTX2DOM(ctx); if (ctx->context.tag->owner == NULL) ctx->context.tag->owner = dev; ctx->context.refs++; /* tag referenced us */ } AMDIOMMU_UNLOCK(unit); return (ctx); } struct iommu_ctx * amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init) { struct amdiommu_unit *unit; struct amdiommu_ctx *ret; int error; uint32_t edte; uint16_t rid1; uint8_t dte; error = amdiommu_find_unit(dev, &unit, &rid1, &dte, &edte, bootverbose); if (error != 0) return (NULL); if (AMD2IOMMU(unit) != iommu) /* XXX complain loudly */ return (NULL); ret = amdiommu_get_ctx_for_dev(unit, dev, rid1, pci_get_domain(dev), id_mapped, rmrr_init, dte, edte); return (CTX2IOCTX(ret)); } void amdiommu_free_ctx_locked_method(struct iommu_unit *iommu, struct iommu_ctx *context) { struct amdiommu_unit *unit; struct amdiommu_ctx *ctx; unit = IOMMU2AMD(iommu); ctx = IOCTX2CTX(context); amdiommu_free_ctx_locked(unit, ctx); }