/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018 Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted providing that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi_if.h" #include "cpufreq_if.h" extern uint64_t tsc_freq; static int intel_hwpstate_probe(device_t dev); static int intel_hwpstate_attach(device_t dev); static int intel_hwpstate_detach(device_t dev); static int intel_hwpstate_suspend(device_t dev); static int intel_hwpstate_resume(device_t dev); static int intel_hwpstate_get(device_t dev, struct cf_setting *cf); static int intel_hwpstate_type(device_t dev, int *type); static device_method_t intel_hwpstate_methods[] = { /* Device interface */ DEVMETHOD(device_identify, intel_hwpstate_identify), DEVMETHOD(device_probe, intel_hwpstate_probe), DEVMETHOD(device_attach, intel_hwpstate_attach), DEVMETHOD(device_detach, intel_hwpstate_detach), DEVMETHOD(device_suspend, intel_hwpstate_suspend), DEVMETHOD(device_resume, intel_hwpstate_resume), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_get, intel_hwpstate_get), DEVMETHOD(cpufreq_drv_type, intel_hwpstate_type), DEVMETHOD_END }; struct hwp_softc { device_t dev; bool hwp_notifications; bool hwp_activity_window; bool hwp_pref_ctrl; bool hwp_pkg_ctrl; bool hwp_pkg_ctrl_en; bool hwp_perf_bias; bool hwp_perf_bias_cached; uint64_t req; /* Cached copy of HWP_REQUEST */ uint64_t hwp_energy_perf_bias; /* Cache PERF_BIAS */ uint8_t high; uint8_t guaranteed; uint8_t efficient; uint8_t low; }; static devclass_t hwpstate_intel_devclass; static driver_t hwpstate_intel_driver = { "hwpstate_intel", intel_hwpstate_methods, sizeof(struct hwp_softc), }; DRIVER_MODULE(hwpstate_intel, cpu, hwpstate_intel_driver, hwpstate_intel_devclass, NULL, NULL); MODULE_VERSION(hwpstate_intel, 1); static bool hwpstate_pkg_ctrl_enable = true; SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN, &hwpstate_pkg_ctrl_enable, 0, "Set 1 (default) to enable package-level control, 0 to disable"); static int intel_hwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS) { device_t dev; struct pcpu *pc; struct sbuf *sb; struct hwp_softc *sc; uint64_t data, data2; int ret; sc = (struct hwp_softc *)arg1; dev = sc->dev; pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL); sbuf_putc(sb, '\n'); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); rdmsr_safe(MSR_IA32_PM_ENABLE, &data); sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid, ((data & 1) ? "En" : "Dis")); if (data == 0) { ret = 0; goto out; } rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &data); sbuf_printf(sb, "\tHighest Performance: %03ju\n", data & 0xff); sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", (data >> 8) & 0xff); sbuf_printf(sb, "\tEfficient Performance: %03ju\n", (data >> 16) & 0xff); sbuf_printf(sb, "\tLowest Performance: %03ju\n", (data >> 24) & 0xff); rdmsr_safe(MSR_IA32_HWP_REQUEST, &data); data2 = 0; if (sc->hwp_pkg_ctrl && (data & IA32_HWP_REQUEST_PACKAGE_CONTROL)) rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &data2); sbuf_putc(sb, '\n'); #define pkg_print(x, name, offset) do { \ if (!sc->hwp_pkg_ctrl || (data & x) != 0) \ sbuf_printf(sb, "\t%s: %03u\n", name, \ (unsigned)(data >> offset) & 0xff); \ else \ sbuf_printf(sb, "\t%s: %03u\n", name, \ (unsigned)(data2 >> offset) & 0xff); \ } while (0) pkg_print(IA32_HWP_REQUEST_EPP_VALID, "Requested Efficiency Performance Preference", 24); pkg_print(IA32_HWP_REQUEST_DESIRED_VALID, "Requested Desired Performance", 16); pkg_print(IA32_HWP_REQUEST_MAXIMUM_VALID, "Requested Maximum Performance", 8); pkg_print(IA32_HWP_REQUEST_MINIMUM_VALID, "Requested Minimum Performance", 0); #undef pkg_print sbuf_putc(sb, '\n'); out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); ret = sbuf_finish(sb); if (ret == 0) ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); sbuf_delete(sb); return (ret); } static inline int percent_to_raw(int x) { MPASS(x <= 100 && x >= 0); return (0xff * x / 100); } /* * Given x * 10 in [0, 1000], round to the integer nearest x. * * This allows round-tripping nice human readable numbers through this * interface. Otherwise, user-provided percentages such as 25, 50, 75 get * rounded down to 24, 49, and 74, which is a bit ugly. */ static inline int round10(int xtimes10) { return ((xtimes10 + 5) / 10); } static inline int raw_to_percent(int x) { MPASS(x <= 0xff && x >= 0); return (round10(x * 1000 / 0xff)); } /* Range of MSR_IA32_ENERGY_PERF_BIAS is more limited: 0-0xf. */ static inline int percent_to_raw_perf_bias(int x) { /* * Round up so that raw values present as nice round human numbers and * also round-trip to the same raw value. */ MPASS(x <= 100 && x >= 0); return (((0xf * x) + 50) / 100); } static inline int raw_to_percent_perf_bias(int x) { /* Rounding to nice human numbers despite a step interval of 6.67%. */ MPASS(x <= 0xf && x >= 0); return (((x * 20) / 0xf) * 5); } static int sysctl_epp_select(SYSCTL_HANDLER_ARGS) { struct hwp_softc *sc; device_t dev; struct pcpu *pc; uint64_t epb; uint32_t val; int ret; dev = oidp->oid_arg1; sc = device_get_softc(dev); if (!sc->hwp_pref_ctrl && !sc->hwp_perf_bias) return (ENODEV); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); if (sc->hwp_pref_ctrl) { val = (sc->req & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24; val = raw_to_percent(val); } else { /* * If cpuid indicates EPP is not supported, the HWP controller * uses MSR_IA32_ENERGY_PERF_BIAS instead (Intel SDM §14.4.4). * This register is per-core (but not HT). */ if (!sc->hwp_perf_bias_cached) { ret = rdmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, &epb); if (ret) goto out; sc->hwp_energy_perf_bias = epb; sc->hwp_perf_bias_cached = true; } val = sc->hwp_energy_perf_bias & IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK; val = raw_to_percent_perf_bias(val); } MPASS(val >= 0 && val <= 100); ret = sysctl_handle_int(oidp, &val, 0, req); if (ret || req->newptr == NULL) goto out; if (val > 100) { ret = EINVAL; goto out; } if (sc->hwp_pref_ctrl) { val = percent_to_raw(val); sc->req = ((sc->req & ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) | (val << 24u)); if (sc->hwp_pkg_ctrl_en) ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); else ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); } else { val = percent_to_raw_perf_bias(val); MPASS((val & ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) == 0); sc->hwp_energy_perf_bias = ((sc->hwp_energy_perf_bias & ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) | val); ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, sc->hwp_energy_perf_bias); } out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (ret); } void intel_hwpstate_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "hwpstate_intel", -1) != NULL) return; if (cpu_vendor_id != CPU_VENDOR_INTEL) return; if (resource_disabled("hwpstate_intel", 0)) return; /* * Intel SDM 14.4.1 (HWP Programming Interfaces): * Availability of HWP baseline resource and capability, * CPUID.06H:EAX[bit 7]: If this bit is set, HWP provides several new * architectural MSRs: IA32_PM_ENABLE, IA32_HWP_CAPABILITIES, * IA32_HWP_REQUEST, IA32_HWP_STATUS. */ if ((cpu_power_eax & CPUTPM1_HWP) == 0) return; if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", device_get_unit(parent)) == NULL) device_printf(parent, "hwpstate_intel: add child failed\n"); } static int intel_hwpstate_probe(device_t dev) { device_set_desc(dev, "Intel Speed Shift"); return (BUS_PROBE_NOWILDCARD); } static int set_autonomous_hwp(struct hwp_softc *sc) { struct pcpu *pc; device_t dev; uint64_t caps; int ret; dev = sc->dev; pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); /* XXX: Many MSRs aren't readable until feature is enabled */ ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1); if (ret) { /* * This is actually a package-level MSR, and only the first * write is not ignored. So it is harmless to enable it across * all devices, and this allows us not to care especially in * which order cores (and packages) are probed. This error * condition should not happen given we gate on the HWP CPUID * feature flag, if the Intel SDM is correct. */ device_printf(dev, "Failed to enable HWP for cpu%d (%d)\n", pc->pc_cpuid, ret); goto out; } ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &sc->req); if (ret) { device_printf(dev, "Failed to read HWP request MSR for cpu%d (%d)\n", pc->pc_cpuid, ret); goto out; } ret = rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &caps); if (ret) { device_printf(dev, "Failed to read HWP capabilities MSR for cpu%d (%d)\n", pc->pc_cpuid, ret); goto out; } /* * High and low are static; "guaranteed" is dynamic; and efficient is * also dynamic. */ sc->high = IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(caps); sc->guaranteed = IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(caps); sc->efficient = IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(caps); sc->low = IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(caps); /* hardware autonomous selection determines the performance target */ sc->req &= ~IA32_HWP_DESIRED_PERFORMANCE; /* enable HW dynamic selection of window size */ sc->req &= ~IA32_HWP_ACTIVITY_WINDOW; /* IA32_HWP_REQUEST.Minimum_Performance = IA32_HWP_CAPABILITIES.Lowest_Performance */ sc->req &= ~IA32_HWP_MINIMUM_PERFORMANCE; sc->req |= sc->low; /* IA32_HWP_REQUEST.Maximum_Performance = IA32_HWP_CAPABILITIES.Highest_Performance. */ sc->req &= ~IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE; sc->req |= sc->high << 8; /* If supported, request package-level control for this CPU. */ if (sc->hwp_pkg_ctrl_en) ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req | IA32_HWP_REQUEST_PACKAGE_CONTROL); else ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); if (ret) { device_printf(dev, "Failed to setup%s autonomous HWP for cpu%d\n", sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid); goto out; } /* If supported, write the PKG-wide control MSR. */ if (sc->hwp_pkg_ctrl_en) { /* * "The structure of the IA32_HWP_REQUEST_PKG MSR * (package-level) is identical to the IA32_HWP_REQUEST MSR * with the exception of the Package Control field, which does * not exist." (Intel SDM §14.4.4) */ ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); if (ret) { device_printf(dev, "Failed to set autonomous HWP for package\n"); } } out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (ret); } static int intel_hwpstate_attach(device_t dev) { struct hwp_softc *sc; int ret; sc = device_get_softc(dev); sc->dev = dev; /* eax */ if (cpu_power_eax & CPUTPM1_HWP_NOTIFICATION) sc->hwp_notifications = true; if (cpu_power_eax & CPUTPM1_HWP_ACTIVITY_WINDOW) sc->hwp_activity_window = true; if (cpu_power_eax & CPUTPM1_HWP_PERF_PREF) sc->hwp_pref_ctrl = true; if (cpu_power_eax & CPUTPM1_HWP_PKG) sc->hwp_pkg_ctrl = true; /* Allow administrators to disable pkg-level control. */ sc->hwp_pkg_ctrl_en = (sc->hwp_pkg_ctrl && hwpstate_pkg_ctrl_enable); /* ecx */ if (cpu_power_ecx & CPUID_PERF_BIAS) sc->hwp_perf_bias = true; ret = set_autonomous_hwp(sc); if (ret) return (ret); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, device_get_nameunit(dev), CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, sc, 0, intel_hwp_dump_sysctl_handler, "A", ""); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "epp", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, dev, 0, sysctl_epp_select, "I", "Efficiency/Performance Preference " "(range from 0, most performant, through 100, most efficient)"); return (cpufreq_register(dev)); } static int intel_hwpstate_detach(device_t dev) { return (cpufreq_unregister(dev)); } static int intel_hwpstate_get(device_t dev, struct cf_setting *set) { struct pcpu *pc; uint64_t rate; int ret; if (set == NULL) return (EINVAL); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set)); set->dev = dev; ret = cpu_est_clockrate(pc->pc_cpuid, &rate); if (ret == 0) set->freq = rate / 1000000; set->volts = CPUFREQ_VAL_UNKNOWN; set->power = CPUFREQ_VAL_UNKNOWN; set->lat = CPUFREQ_VAL_UNKNOWN; return (0); } static int intel_hwpstate_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED; return (0); } static int intel_hwpstate_suspend(device_t dev) { return (0); } /* * Redo a subset of set_autonomous_hwp on resume; untested. Without this, * testers observed that on resume MSR_IA32_HWP_REQUEST was bogus. */ static int intel_hwpstate_resume(device_t dev) { struct hwp_softc *sc; struct pcpu *pc; int ret; sc = device_get_softc(dev); pc = cpu_get_pcpu(dev); if (pc == NULL) return (ENXIO); thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); thread_unlock(curthread); ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1); if (ret) { device_printf(dev, "Failed to enable HWP for cpu%d after suspend (%d)\n", pc->pc_cpuid, ret); goto out; } if (sc->hwp_pkg_ctrl_en) ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req | IA32_HWP_REQUEST_PACKAGE_CONTROL); else ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req); if (ret) { device_printf(dev, "Failed to set%s autonomous HWP for cpu%d after suspend\n", sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid); goto out; } if (sc->hwp_pkg_ctrl_en) { ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req); if (ret) { device_printf(dev, "Failed to set autonomous HWP for package after " "suspend\n"); goto out; } } if (!sc->hwp_pref_ctrl && sc->hwp_perf_bias_cached) { ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, sc->hwp_energy_perf_bias); if (ret) { device_printf(dev, "Failed to set energy perf bias for cpu%d after " "suspend\n", pc->pc_cpuid); } } out: thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); return (ret); }