From: Marcus Glocker Subject: qwz: enable association/rx/tx To: tech@openbsd.org Date: Thu, 14 May 2026 17:08:02 +0200 This diff finally enables association, and RX/TX traffic for qwz(4), getting me a working WiFi on the Samsung Galaxy Book4 Edge. Tested so far: - Ping out/in. - SSH out/in. - SCP out. - After 2 hours of inactivity, the association still remained active and traffic still was flowing. Following the summarized changes to reach this state. Bug fixes (HAL/WMI drift vs. current ath12k): 1. RX_BE_PADDING0_BYTES 80 -> 8 -- fixes RX-descriptor misalignment (dlpager crash, Hexagon 0x23). 2. Send WMI_VDEV_PARAM_SET_HEMU_MODE = 0 before peer_assoc_cmd -- clears stale HE-MU state. 3. volatile cast on dst-ring hp_addr read in qwz_hal_srng_access_begin() -- matches SRC branch, fixes ARM64 hoist. 4. BUFFER_ADDR_INFO1: RET_BUF_MGR GENMASK(10,8) -> (11,8), SW_COOKIE (31,11) -> (31,12). 5. wbm2sw_cc_enable = WBM2SW3_EN only -- keeps HW Cookie Convert off the TX rings. 6. HTT_TX_WBM_COMP_INFO0_STATUS (12,9) -> (16,13); drop bogus INFO2_SW_PEER_ID/VALID; add INFO1_REINJECT_REASON, INFO1_EXCEPTION_FRAME, INFO2_ACK_RSSI. 7. REO/TX_RATE_STATS GENMASK shifts (HAL_REO_UPD_RX_QUEUE_INFO2_*, HAL_RX_REO_QUEUE_INFO0/1_* +2 bits; HAL_TX_RATE_STATS_INFO0_* +1 bit) + new HTT_RX_RING_SELECTION_CFG_CMD_INFO0/1. 8. qwz_dp_tx_get_tid() returns HAL_DESC_REO_NON_QOS_TID for non-QoS frames. X1E80100 workaround: - Multi-MSI ext vectors don't deliver: drive qwz_dp_service_srng() for all ext_irq groups from qwz_ce_intr(). Supporting changes to reach the working state: - MHI boot: wait for SBL EE + M0 before BHIE/AMSS in qwz_mhi_fw_load_handler(). - Capture single_chip_mlo_support and qmi_phy_cap_num_phy from QMI phy_cap. - Send WMI_TAG_MLO_{PEER,VDEV}_CREATE_PARAMS TLVs on peer/vdev create. - Peer assoc: trailing EHT TLVs; is_wme_set=1, qos_flag=1, peer_listen_intval=1. - RX TLV offset_valid=1 with packet/header/mpdu offsets for WCN7850. - Propagate AST hash/index from peer_map into arvif. - hal_desc_sz returns sizeof(struct hal_rx_desc_wcn7850). - Per-frame encrypt_type (CCMP-128 / TKIP-MIC) in qwz_dp_tx(). - Silent drop filter for stray RX (addr1 not us, 84:e1 prefix). Given that the state on my Samsung seems pretty stable until now, I would ask to enable qwz(4) already for arm64 and amd64 GENERIC. Feedback, further testing, OKs? Index: sys/arch/amd64/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/arch/amd64/conf/GENERIC,v diff -u -p -u -p -r1.538 GENERIC --- sys/arch/amd64/conf/GENERIC 10 Jan 2026 16:12:36 -0000 1.538 +++ sys/arch/amd64/conf/GENERIC 14 May 2026 14:45:52 -0000 @@ -597,7 +597,7 @@ iwn* at pci? # Intel WiFi Link 4965/5 iwm* at pci? # Intel WiFi Link 7xxx iwx* at pci? # Intel WiFi Link 22xxx qwx* at pci? # Qualcomm 802.11ax -#qwz* at pci? # Qualcomm 802.11be +qwz* at pci? # Qualcomm 802.11be ral* at pci? # Ralink RT2500/RT2501/RT2600 ral* at cardbus? # Ralink RT2500/RT2501/RT2600 rtw* at pci? # Realtek 8180 Index: sys/arch/arm64/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/arch/arm64/conf/GENERIC,v diff -u -p -u -p -r1.314 GENERIC --- sys/arch/arm64/conf/GENERIC 4 May 2026 08:02:42 -0000 1.314 +++ sys/arch/arm64/conf/GENERIC 14 May 2026 14:45:53 -0000 @@ -430,7 +430,7 @@ iwn* at pci? # Intel WiFi Link 4965/5 iwm* at pci? # Intel WiFi Link 7xxx iwx* at pci? # Intel WiFi Link 22xxx qwx* at pci? # Qualcomm 802.11ax -#qwz* at pci? # Qualcomm 802.11be +qwz* at pci? # Qualcomm 802.11be # PCI SCSI ahci* at pci? flags 0x0000 # AHCI SATA controllers Index: sys/dev/ic/qwz.c =================================================================== RCS file: /cvs/src/sys/dev/ic/qwz.c,v diff -u -p -u -p -r1.26 qwz.c --- sys/dev/ic/qwz.c 26 Apr 2026 19:25:08 -0000 1.26 +++ sys/dev/ic/qwz.c 14 May 2026 14:45:54 -0000 @@ -1286,21 +1286,7 @@ qwz_hw_wcn7850_dp_rx_h_mpdu_err(struct h uint32_t qwz_hw_wcn7850_get_rx_desc_size(void) { - /* - * Empirically observed on WCN7850 hw2.0 fw 0x110cffff: the FW - * places the MSDU payload at offset 512 of the buffer (with the - * mpdu_start_tag at 216 and mpdu_start data at 224, matching our - * 80-byte rx_padding0). The struct sizeof works out to only 472 - * bytes, so override the descriptor size getter to return the - * actual 512 bytes for m_adj to strip the right amount. - * - * NOTE: keeping struct sizeof at 472 is intentional; bumping - * pkt_hdr_tlv to 168 to make sizeof = 512 caused spontaneous - * machine reboots, suggesting a downstream code path (likely the - * EAPOL TX response) was crashing the FW once real frames started - * arriving. We isolate that here by only changing what m_adj sees. - */ - return 512; + return sizeof(struct hal_rx_desc_wcn7850); } uint8_t @@ -1664,10 +1650,16 @@ ath12k_hal_wcn7850_tcl_to_wbm_rbm_map[DP static const struct ath12k_hw_hal_params ath12k_hw_hal_params_wcn7850 = { .rx_buf_rbm = HAL_RX_BUF_RBM_SW1_BM, - .wbm2sw_cc_enable = HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW0_EN | - HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW2_EN | - HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW3_EN | - HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW4_EN, + /* + * Keep HW cookie-conversion enabled only for the RX release ring + * (WBM2SW3, DP_RX_RELEASE_RING_NUM=3). Linux ath12k handles the + * new HW-CC TX completion layout (hal_wbm_completion_ring_tx with + * buf_va_lo/buf_va_hi), but qwz parses TX completions using the + * older hal_wbm_release_ring layout with buf_addr_info. Disabling + * HW CC for the TX rings forces FW to use the SW-cookie path so + * qwz's BUFFER_ADDR_INFO1_SW_COOKIE lookup matches FW writeback. + */ + .wbm2sw_cc_enable = HAL_WBM_SW_COOKIE_CONV_CFG_WBM2SW3_EN, }; const struct hal_rx_ops hal_rx_wcn7850_ops = { @@ -2571,7 +2563,7 @@ static const struct qmi_elem_info qmi_wl { .data_type = QMI_UNSIGNED_2_BYTE, .elem_len = 1, - .elem_size = sizeof(uint8_t), + .elem_size = sizeof(uint16_t), .array_type = NO_ARRAY, .tlv_type = 0x24, .offset = offsetof(struct qmi_wlanfw_host_cap_req_msg_v01, @@ -3875,6 +3867,7 @@ qwz_ce_intr(void *arg) { struct qwz_ce_pipe *pipe = arg; struct qwz_softc *sc = pipe->sc; + int ret; if (!test_bit(ATH12K_FLAG_CE_IRQ_ENABLED, sc->sc_flags) || ((sc->msi_ce_irqmask & (1 << pipe->pipe_num)) == 0)) { @@ -3883,7 +3876,20 @@ qwz_ce_intr(void *arg) return 1; } - return qwz_ce_per_engine_service(sc, pipe->pipe_num); + ret = qwz_ce_per_engine_service(sc, pipe->pipe_num); + + /* + * Multi-MSI ext-IRQ vectors do not deliver on X1E80100; drive + * the DP service path from the CE interrupt instead. + */ + if (test_bit(ATH12K_FLAG_MULTI_MSI_VECTORS, sc->sc_flags) && + test_bit(ATH12K_FLAG_EXT_IRQ_ENABLED, sc->sc_flags)) { + int i; + for (i = 0; i < nitems(sc->ext_irq_grp); i++) + qwz_dp_service_srng(sc, i); + } + + return ret; } int @@ -4717,6 +4723,18 @@ qwz_qmi_recv_wlanfw_phy_cap_req_v1(struc DNPRINTF(QWZ_D_QMI, "%s: resp.single_chip_mlo_support=0x%x\n", __func__, resp.single_chip_mlo_support); + /* + * Capture WCN7850's QMI single_chip_mlo_support bit so that + * qwz_qmi_host_cap_send can echo MLO capability back to FW. + * Linux ath12k core.c notes that "WCN7850 firmware uses QMI + * single_chip_mlo_support bit" specifically for MLO advertisement, + * and qwz never captured/echoed this -- without it, FW takes a + * code path different from Linux for this chip. + */ + if (resp.single_chip_mlo_support_valid && resp.single_chip_mlo_support) + sc->single_chip_mlo_support = 1; + if (resp.num_phy_valid) + sc->qmi_phy_cap_num_phy = resp.num_phy; sc->qmi_resp.result = le16toh(resp.resp.result); sc->qmi_resp.error = le16toh(resp.resp.error); wakeup(&sc->qmi_resp); @@ -5447,6 +5465,7 @@ qwz_qmi_encode_struct(uint8_t *p, size_t { const struct qmi_elem_info *ei = struct_ei->ei_array; size_t remain = input_len; + int nelem, idx; *encoded_len = 0; @@ -5472,39 +5491,50 @@ qwz_qmi_encode_struct(uint8_t *p, size_t } } - if (ei->elem_size > remain) { + /* + * STATIC_ARRAY in a struct member (e.g. hw_link_id[2] in + * wlfw_host_mlo_chip_info_s_v01) means we have to emit + * elem_len consecutive elements; the per-byte/word/etc. + * encoders already use input + offset + idx*elem_size so + * we just call them in a loop and advance p per iteration. + * NO_ARRAY collapses to nelem=1 -- preserves prior behavior. + */ + nelem = (ei->array_type == STATIC_ARRAY) ? ei->elem_len : 1; + if ((size_t)ei->elem_size * nelem > remain) { printf("%s: QMI message buffer too short\n", __func__); return -1; } - switch (ei->data_type) { - case QMI_UNSIGNED_1_BYTE: - if (qwz_qmi_encode_byte(p, ei, input, 0)) - return -1; - break; - case QMI_UNSIGNED_2_BYTE: - if (qwz_qmi_encode_word(p, ei, input, 0)) - return -1; - break; - case QMI_UNSIGNED_4_BYTE: - case QMI_SIGNED_4_BYTE_ENUM: - if (qwz_qmi_encode_dword(p, ei, input, 0)) - return -1; - break; - case QMI_UNSIGNED_8_BYTE: - if (qwz_qmi_encode_qword(p, ei, input, 0)) + for (idx = 0; idx < nelem; idx++) { + switch (ei->data_type) { + case QMI_UNSIGNED_1_BYTE: + if (qwz_qmi_encode_byte(p, ei, input, idx)) + return -1; + break; + case QMI_UNSIGNED_2_BYTE: + if (qwz_qmi_encode_word(p, ei, input, idx)) + return -1; + break; + case QMI_UNSIGNED_4_BYTE: + case QMI_SIGNED_4_BYTE_ENUM: + if (qwz_qmi_encode_dword(p, ei, input, idx)) + return -1; + break; + case QMI_UNSIGNED_8_BYTE: + if (qwz_qmi_encode_qword(p, ei, input, idx)) + return -1; + break; + default: + printf("%s: unhandled QMI struct element " + "type %d\n", __func__, ei->data_type); return -1; - break; - default: - printf("%s: unhandled QMI struct element type %d\n", - __func__, ei->data_type); - return -1; + } + if (p != NULL) + p += ei->elem_size; } - remain -= ei->elem_size; - if (p != NULL) - p += ei->elem_size; - *encoded_len += ei->elem_size; + remain -= (size_t)ei->elem_size * nelem; + *encoded_len += (size_t)ei->elem_size * nelem; ei++; } @@ -5615,6 +5645,33 @@ qwz_qmi_encode_msg(uint8_t **encoded_msg goto err; *encoded_len += encoded_string_len; ei++; + } else if (ei->array_type == STATIC_ARRAY) { + /* + * STATIC_ARRAY without a preceding QMI_DATA_LEN + * always emits exactly elem_len entries on the wire, + * regardless of any "valid count" field elsewhere + * in the message. Mirrors Linux's QMI library -- + * required for fields like mlo_chip_info[3] in + * qmi_wlanfw_host_cap_req_msg_v01. + */ + if (ei->data_type == QMI_STRUCT) { + for (i = 0; i < ei->elem_len; i++) { + size_t encoded_struct_len = 0; + size_t inoff = ei->offset + + (i * ei->elem_size); + + if (qwz_qmi_encode_struct(NULL, + &encoded_struct_len, ei, + input + inoff, + input_len - inoff)) + goto err; + *encoded_len += encoded_struct_len; + } + } else { + *encoded_len += (size_t)ei->elem_size * + ei->elem_len; + } + ei++; } else { *encoded_len += ei->elem_size; ei++; @@ -5677,6 +5734,9 @@ qwz_qmi_encode_msg(uint8_t **encoded_msg ei++; if (ei->array_type == VAR_LEN_ARRAY) nelem = datalen; + } else if (ei->array_type == STATIC_ARRAY) { + /* See first-pass STATIC_ARRAY comment above. */ + nelem = ei->elem_len; } for (i = 0; i < nelem; i++) { @@ -5957,6 +6017,25 @@ qwz_qmi_host_cap_send(struct qwz_softc * req.nm_modem |= QWZ_PLATFORM_CAP_PCIE_GLOBAL_RESET; } + /* + * MLO advertisement is intentionally NOT emitted here. + * + * The QMI encoder now handles STATIC_ARRAY correctly so the wire + * format would be valid (FW accepts the request -- verified in + * 2026-05-01 testing with the MLO block enabled), BUT advertising + * MLO capability triggers FW to expect additional MLO-specific + * WMI/HTT initialization that qwz does not currently perform. + * The result is an earlier post-AUTHORIZE FW fault than in the + * non-MLO baseline -- worse, not better. + * + * The PHY_CAP capture (sc->single_chip_mlo_support, + * sc->qmi_phy_cap_num_phy) and the diagnostic printf in + * qwz_qmi_recv_wlanfw_phy_cap_req_v1 stay in place. When MLO + * support is fully ported (post-association MLO link setup, + * MLO peer state machine, etc.), restore the emission block + * here -- the encoder is ready for it. + */ + DNPRINTF(QWZ_D_QMI, "%s: qmi host cap request\n", __func__); ret = qwz_qmi_send_request(sc, QMI_WLANFW_HOST_CAP_REQ_V01, @@ -7141,7 +7220,17 @@ qwz_hal_srng_access_begin(struct qwz_sof srng->u.src_ring.cached_tp = *(volatile uint32_t *)srng->u.src_ring.tp_addr; } else { - srng->u.dst_ring.cached_hp = *srng->u.dst_ring.hp_addr; + /* + * Volatile load: hp_addr lives in the rdp DMA-shared + * region the device writes to. Without volatile the + * compiler hoisted the load and we always read a stale + * cached_hp, so TX completion entries the FW wrote + * accumulated indefinitely (4 entries written, only 2 + * drained) until the FW's TX desc pool exhausted and + * QURT asserted. + */ + srng->u.dst_ring.cached_hp = + *(volatile uint32_t *)srng->u.dst_ring.hp_addr; } } @@ -12518,7 +12607,22 @@ qwz_peer_map_event(struct qwz_softc *sc, #ifdef notyet spin_lock_bh(&ab->base_lock); #endif - ni = ieee80211_find_node(ic, mac_addr); + /* + * For STA mode the only peer is the AP, and the per-peer state + * we care about (FW-assigned ast_hash / hw_peer_id) is consumed + * later via ic->ic_bss in qwz_peer_create. ieee80211_find_node + * may return a DIFFERENT node from the RB-tree (a stale scan + * entry for the same BSSID), and updating that wrong node + * leaves ic_bss's qwz_peer with ast_hash=0 / hw_peer_id=0 + * forever -- causing the FW to AST-look-up slot 0 on the first + * protected post-AUTHORIZE frame and dlpager-fault. Prefer + * ic_bss whenever the MAC matches. + */ + if (ic->ic_opmode == IEEE80211_M_STA && ic->ic_bss != NULL && + IEEE80211_ADDR_EQ(ic->ic_bss->ni_macaddr, mac_addr)) + ni = ic->ic_bss; + else + ni = ieee80211_find_node(ic, mac_addr); if (ni == NULL) { printf("%s: peer_map: no node for %s\n", sc->sc_dev.dv_xname, ether_sprintf(mac_addr)); @@ -12535,6 +12639,16 @@ qwz_peer_map_event(struct qwz_softc *sc, ether_addr_copy(peer->addr, mac_addr); list_add(&peer->list, &ab->peers); #endif + /* Propagate FW-assigned AST values to STA arvif for qwz_dp_tx(). */ + { + struct qwz_vif *arvif = TAILQ_FIRST(&sc->vif_list); + if (ic->ic_opmode == IEEE80211_M_STA && + arvif != NULL && arvif->vdev_id == vdev_id) { + arvif->ast_hash = ast_hash; + arvif->ast_idx = hw_peer_id; + } + } + sc->peer_mapped = 1; wakeup(&sc->peer_mapped); @@ -13435,6 +13549,9 @@ qwz_dp_tx_htt_rx_filter_setup(struct qwz !!(params.flags & HAL_SRNG_FLAGS_MSI_SWAP)); cmd->info0 |= FIELD_PREP(HTT_RX_RING_SELECTION_CFG_CMD_INFO0_PS, !!(params.flags & HAL_SRNG_FLAGS_DATA_TLV_SWAP)); + cmd->info0 |= FIELD_PREP( + HTT_RX_RING_SELECTION_CFG_CMD_INFO0_OFFSET_VALID, + !!tlv_filter->offset_valid); cmd->info1 = FIELD_PREP(HTT_RX_RING_SELECTION_CFG_CMD_INFO1_BUF_SIZE, rx_buf_size); @@ -13444,6 +13561,33 @@ qwz_dp_tx_htt_rx_filter_setup(struct qwz cmd->pkt_type_en_flags3 = tlv_filter->pkt_filter_flags3; cmd->rx_filter_tlv = tlv_filter->rx_filter; + if (tlv_filter->offset_valid) { + cmd->rx_packet_offset = FIELD_PREP( + HTT_RX_RING_SELECTION_CFG_RX_PACKET_OFFSET, + tlv_filter->rx_packet_offset); + cmd->rx_packet_offset |= FIELD_PREP( + HTT_RX_RING_SELECTION_CFG_RX_HEADER_OFFSET, + tlv_filter->rx_header_offset); + + cmd->rx_mpdu_offset = FIELD_PREP( + HTT_RX_RING_SELECTION_CFG_RX_MPDU_END_OFFSET, + tlv_filter->rx_mpdu_end_offset); + cmd->rx_mpdu_offset |= FIELD_PREP( + HTT_RX_RING_SELECTION_CFG_RX_MPDU_START_OFFSET, + tlv_filter->rx_mpdu_start_offset); + + cmd->rx_msdu_offset = FIELD_PREP( + HTT_RX_RING_SELECTION_CFG_RX_MSDU_END_OFFSET, + tlv_filter->rx_msdu_end_offset); + cmd->rx_msdu_offset |= FIELD_PREP( + HTT_RX_RING_SELECTION_CFG_RX_MSDU_START_OFFSET, + tlv_filter->rx_msdu_start_offset); + + cmd->rx_attn_offset = FIELD_PREP( + HTT_RX_RING_SELECTION_CFG_RX_ATTENTION_OFFSET, + tlv_filter->rx_attn_offset); + } + ret = qwz_htc_send(&sc->htc, sc->dp.eid, m); if (ret) goto err_free; @@ -13503,6 +13647,31 @@ qwz_dp_rxdma_ring_sel_config_wcn7850(str tlv_filter.pkt_filter_flags2 = HTT_RX_FP_CTRL_PKT_FILTER_TLV_FLAGS2_BAR; tlv_filter.pkt_filter_flags3 = HTT_RX_FP_DATA_FILTER_FLASG3; + /* + * WCN7850 FW requires explicit RX TLV offsets within our + * hal_rx_desc layout. Without these the FW DMAs the various + * RX TLVs at default offsets that don't match our struct + * layout, internal FW state corrupts as packets flow, and the + * dlpager eventually faults at a stale function pointer. + * Mirror of Linux ath12k_dp_rxdma_ring_sel_config_wcn7850. + */ + tlv_filter.offset_valid = 1; + /* + * rx_packet_offset uses sc->hal.hal_desc_sz (=512 on WCN7850), + * NOT sizeof(struct hal_rx_desc) (=472). WCN7850 FW empirically + * places the packet at offset 512 even though our struct is only + * 472; using sizeof here would tell the FW to DMA the packet 40 + * bytes earlier than our driver reads it from, and EAPOL never + * reaches net80211 -> 4-way handshake never starts. + */ + tlv_filter.rx_packet_offset = sc->hal.hal_desc_sz; + tlv_filter.rx_header_offset = + offsetof(struct hal_rx_desc_wcn7850, pkt_hdr_tlv); + tlv_filter.rx_mpdu_start_offset = + offsetof(struct hal_rx_desc_wcn7850, mpdu_start_tag); + tlv_filter.rx_msdu_end_offset = + offsetof(struct hal_rx_desc_wcn7850, msdu_end_tag); + for (i = 0; i < sc->hw_params.num_rxmda_per_pdev; i++) { ring_id = dp->rx_mac_buf_ring[i].ring_id; ret = qwz_dp_tx_htt_rx_filter_setup(sc, ring_id, i, @@ -13863,14 +14032,9 @@ qwz_dp_tx_process_htt_tx_complete(struct case HAL_WBM_REL_HTT_TX_COMP_STATUS_TTL: ts.acked = (wbm_status == HAL_WBM_REL_HTT_TX_COMP_STATUS_OK); ts.msdu_id = msdu_id; - ts.ack_rssi = FIELD_GET(HTT_TX_WBM_COMP_INFO1_ACK_RSSI, - status_desc->info1); - - if (FIELD_GET(HTT_TX_WBM_COMP_INFO2_VALID, status_desc->info2)) - ts.peer_id = FIELD_GET(HTT_TX_WBM_COMP_INFO2_SW_PEER_ID, - status_desc->info2); - else - ts.peer_id = HTT_INVALID_PEER_ID; + ts.ack_rssi = FIELD_GET(HTT_TX_WBM_COMP_INFO2_ACK_RSSI, + status_desc->info2); + ts.peer_id = HTT_INVALID_PEER_ID; qwz_dp_tx_htt_tx_complete_buf(sc, tx_ring, &ts); break; @@ -15009,6 +15173,7 @@ qwz_dp_rx_process_msdu(struct qwz_softc uint16_t msdu_len; int ret; uint32_t hal_rx_desc_sz = sc->hal.hal_desc_sz; + struct rx_mpdu_start_qcn9274 *mpdu; last_buf = qwz_dp_rx_get_msdu_last_buf(msdu_list, msdu); if (!last_buf) { @@ -15031,13 +15196,11 @@ qwz_dp_rx_process_msdu(struct qwz_softc /* * WCN7850 FW injects internal messages into the REO ring with - * fc_valid=1 but garbage 802.11 contents. Their synthetic addr1 - * always ends in 84:e1 (regardless of the multicast bit). Drop - * those, then drop any remaining unicast frames not addressed + * fc_valid=1 but garbage 802.11 contents; their synthetic addr1 + * ends in 84:e1. Drop those and any unicast frames not addressed * to our own MAC. */ - struct rx_mpdu_start_qcn9274 *mpdu = &rx_desc->u.wcn7850.mpdu_start; - + mpdu = &rx_desc->u.wcn7850.mpdu_start; if (mpdu->addr1[4] == 0x84 && mpdu->addr1[5] == 0xe1) return EIO; if (!(mpdu->addr1[0] & 0x01) && @@ -16807,15 +16970,28 @@ qwz_wmi_send_peer_create_cmd(struct qwz_ { struct qwz_pdev_wmi *wmi = &sc->wmi.wmi[pdev_id]; struct wmi_peer_create_cmd *cmd; + struct wmi_peer_create_mlo_params *ml_param; + struct wmi_tlv *tlv; struct mbuf *m; - int ret; + void *ptr; + int ret, len; - m = qwz_wmi_alloc_mbuf(sizeof(*cmd)); + /* + * The FW expects a trailing WMI_TAG_ARRAY_STRUCT containing a + * WMI_TAG_MLO_PEER_CREATE_PARAMS struct after the cmd, even when + * MLO is unused. Without it the FW reads past our buffer when + * walking the TLV stream, stores garbage MLO flags in per-peer + * state, and crashes later when it consumes that state. + */ + len = sizeof(*cmd) + TLV_HDR_SIZE + sizeof(*ml_param); + + m = qwz_wmi_alloc_mbuf(len); if (!m) return ENOMEM; - cmd = (struct wmi_peer_create_cmd *)(mtod(m, uint8_t *) + - sizeof(struct ath12k_htc_hdr) + sizeof(struct wmi_cmd_hdr)); + ptr = (void *)(mtod(m, uint8_t *) + sizeof(struct ath12k_htc_hdr) + + sizeof(struct wmi_cmd_hdr)); + cmd = ptr; cmd->tlv_header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_PEER_CREATE_CMD) | FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE); @@ -16823,6 +16999,17 @@ qwz_wmi_send_peer_create_cmd(struct qwz_ cmd->peer_type = param->peer_type; cmd->vdev_id = param->vdev_id; + ptr = (uint8_t *)ptr + sizeof(*cmd); + tlv = ptr; + tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_STRUCT) | + FIELD_PREP(WMI_TLV_LEN, sizeof(*ml_param)); + ptr = (uint8_t *)ptr + TLV_HDR_SIZE; + ml_param = ptr; + ml_param->tlv_header = + FIELD_PREP(WMI_TLV_TAG, WMI_TAG_MLO_PEER_CREATE_PARAMS) | + FIELD_PREP(WMI_TLV_LEN, sizeof(*ml_param) - TLV_HDR_SIZE); + /* flags=0: MLO disabled (mbuf is zero-initialized). */ + ret = qwz_wmi_cmd_send(wmi, m, WMI_PEER_CREATE_CMDID); if (ret) { if (ret != ESHUTDOWN) { @@ -17042,7 +17229,18 @@ qwz_wmi_send_peer_assoc_cmd(struct qwz_s TLV_HDR_SIZE + (peer_legacy_rates_align * sizeof(uint8_t)) + TLV_HDR_SIZE + (peer_ht_rates_align * sizeof(uint8_t)) + sizeof(*mcs) + TLV_HDR_SIZE + - (sizeof(*he_mcs) * param->peer_he_mcs_count); + (sizeof(*he_mcs) * param->peer_he_mcs_count) + + /* + * The FW expects three trailing TLVs (in this order) after + * the HE rate set, even when the corresponding features are + * unused. Without these placeholder TLV headers the FW reads + * past the end of our buffer when walking the TLV stream and + * crashes inside dlpager. Mirror of Linux ath12k. + * 1. WMI_TAG_ARRAY_STRUCT (ML params) + * 2. WMI_TAG_ARRAY_STRUCT (EHT rate set) + * 3. WMI_TAG_ARRAY_STRUCT (ML partner info) + */ + 3 * TLV_HDR_SIZE; m = qwz_wmi_alloc_mbuf(len); if (!m) @@ -17156,6 +17354,28 @@ qwz_wmi_send_peer_assoc_cmd(struct qwz_s ptr += sizeof(*he_mcs); } + /* + * Three trailing placeholder TLVs the FW always expects after the + * HE rate set: ML params, EHT rate set, ML partner info. All + * empty (length 0) since we don't support MLO or WiFi7 EHT rates. + * Without these the FW walks past our buffer end and crashes in + * dlpager. Mirror of Linux ath12k_wmi_send_peer_assoc_cmd(). + */ + tlv = ptr; + tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_STRUCT) | + FIELD_PREP(WMI_TLV_LEN, 0); /* ML params */ + ptr += TLV_HDR_SIZE; + + tlv = ptr; + tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_STRUCT) | + FIELD_PREP(WMI_TLV_LEN, 0); /* EHT rate set */ + ptr += TLV_HDR_SIZE; + + tlv = ptr; + tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_STRUCT) | + FIELD_PREP(WMI_TLV_LEN, 0); /* ML partner info */ + ptr += TLV_HDR_SIZE; + ret = qwz_wmi_cmd_send(wmi, m, WMI_PEER_ASSOC_CMDID); if (ret) { if (ret != ESHUTDOWN) { @@ -17166,21 +17386,6 @@ qwz_wmi_send_peer_assoc_cmd(struct qwz_s return ret; } - DNPRINTF(QWZ_D_WMI, "%s: cmd peer assoc vdev id %d assoc id %d " - "peer mac %s peer_flags %x rate_caps %x peer_caps %x " - "listen_intval %d ht_caps %x max_mpdu %d nss %d phymode %d " - "peer_mpdu_density %d vht_caps %x he cap_info %x he ops %x " - "he cap_info_ext %x he phy %x %x %x peer_bw_rxnss_override %x\n", - __func__, cmd->vdev_id, cmd->peer_associd, - ether_sprintf(param->peer_mac), - cmd->peer_flags, cmd->peer_rate_caps, cmd->peer_caps, - cmd->peer_listen_intval, cmd->peer_ht_caps, - cmd->peer_max_mpdu, cmd->peer_nss, cmd->peer_phymode, - cmd->peer_mpdu_density, cmd->peer_vht_caps, cmd->peer_he_cap_info, - cmd->peer_he_ops, cmd->peer_he_cap_info_ext, - cmd->peer_he_cap_phy[0], cmd->peer_he_cap_phy[1], - cmd->peer_he_cap_phy[2], cmd->peer_bw_rxnss_override); - return 0; } @@ -17523,7 +17728,7 @@ qwz_wmi_mgmt_send(struct qwz_softc *sc, struct wmi_mgmt_send_cmd *cmd; struct wmi_tlv *frame_tlv; struct mbuf *m; - uint32_t buf_len; + uint32_t buf_len, buf_len_aligned; int ret, len; uint64_t paddr; @@ -17532,7 +17737,9 @@ qwz_wmi_mgmt_send(struct qwz_softc *sc, buf_len = frame->m_pkthdr.len < WMI_MGMT_SEND_DOWNLD_LEN ? frame->m_pkthdr.len : WMI_MGMT_SEND_DOWNLD_LEN; - len = sizeof(*cmd) + sizeof(*frame_tlv) + roundup(buf_len, 4); + buf_len_aligned = roundup(buf_len, sizeof(uint32_t)); + + len = sizeof(*cmd) + sizeof(*frame_tlv) + buf_len_aligned; m = qwz_wmi_alloc_mbuf(len); if (!m) @@ -17555,12 +17762,13 @@ qwz_wmi_mgmt_send(struct qwz_softc *sc, sizeof(struct ath12k_htc_hdr) + sizeof(struct wmi_cmd_hdr) + sizeof(*cmd)); frame_tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_BYTE) | - FIELD_PREP(WMI_TLV_LEN, buf_len); + FIELD_PREP(WMI_TLV_LEN, buf_len_aligned); memcpy(frame_tlv->value, mtod(frame, void *), buf_len); #if 0 /* Not needed on OpenBSD? */ ath12k_ce_byte_swap(frame_tlv->value, buf_len); #endif + ret = qwz_wmi_cmd_send(wmi, m, WMI_MGMT_TX_SEND_CMDID); if (ret) { if (ret != ESHUTDOWN) { @@ -21816,22 +22024,21 @@ qwz_peer_create(struct qwz_softc *sc, st spin_lock_bh(&ar->ab->base_lock); #endif peer = &nq->peer; - if (peer) { - if (peer->peer_id != HAL_INVALID_PEERID && - peer->vdev_id == param->vdev_id) { -#ifdef notyet - spin_unlock_bh(&ar->ab->base_lock); - mutex_unlock(&ar->ab->tbl_mtx_lock); -#endif - return EINVAL; - } -#if 0 - /* Assume sta is transitioning to another band. - * Remove here the peer from rhash. - */ - ath12k_peer_rhash_delete(ar->ab, peer); -#endif - } + /* + * Reset stale peer state from any prior attempt. After a + * fatal_firmware_error the FW peer table is wiped but the + * host-side qwz_node persists with peer->peer_id and + * peer->vdev_id from the last attempt. Without this reset + * the subsequent peer_create returns EINVAL and we get stuck + * in a recovery loop (peer_create fail -> wlan mode off fail + * -> mhi_start -> repeat). The stale ast_hash / hw_peer_id + * are also reset because they will be re-populated by the + * next peer_map_event. + */ + peer->peer_id = HAL_INVALID_PEERID; + peer->vdev_id = 0; + peer->ast_hash = 0; + peer->hw_peer_id = 0; #ifdef notyet spin_unlock_bh(&ar->ab->base_lock); mutex_unlock(&ar->ab->tbl_mtx_lock); @@ -22619,10 +22826,20 @@ uint8_t qwz_dp_tx_get_tid(struct mbuf *m) { struct ieee80211_frame *wh = mtod(m, struct ieee80211_frame *); - uint16_t qos = ieee80211_get_qos(wh); - uint8_t tid = qos & IEEE80211_QOS_TID; - return tid; + /* + * Mirror of Linux ath12k_dp_tx_get_tid: non-QoS data frames go + * onto the special HAL_DESC_REO_NON_QOS_TID (=16), not TID 0. + * qwz used to return 0 for every frame, which made the FW believe + * the non-QoS DHCP/ARP frames belonged in TID 0 (AC_BE) queue; + * the resulting mismatch (TCL desc says TID 0, frame has no QoS + * Control field) tripped a QURT internal-state assertion after + * a handful of such frames post-AUTHORIZE. + */ + if (!ieee80211_has_qos(wh)) + return HAL_DESC_REO_NON_QOS_TID; + + return ieee80211_get_qos(wh) & IEEE80211_QOS_TID; } /* @@ -22743,8 +22960,21 @@ qwz_dp_tx(struct qwz_softc *sc, struct q ti.meta_data_flags = arvif->tcl_metadata; - if ((wh->i_fc[1] & IEEE80211_FC1_PROTECTED) && - ti.encap_type == HAL_TCL_ENCAP_TYPE_RAW) { + /* + * Set the per-frame encrypt_type so the FW knows which cipher + * to apply. This must run for ALL encap types, not only RAW: + * leaving encrypt_type at HAL_ENCRYPT_TYPE_OPEN for protected + * data frames in NATIVE_WIFI encap caused the FW to crash on + * the first post-AUTHORIZE data TX (e.g. DHCP DISCOVER) because + * the FW saw a frame with FC1_PROTECTED=1 but a TX descriptor + * saying "no encryption" -- inconsistent state -> dlpager fault. + * + * For RAW encap with HW crypto, we additionally need to make + * room in the mbuf for the cipher MIC (FW writes it in place). + * For NATIVE_WIFI encap the FW does the full encrypt path on + * its side, so no host-side space reservation is required. + */ + if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { k = ieee80211_get_txkey(ic, wh, ni); if (test_bit(ATH12K_FLAG_HW_CRYPTO_DISABLED, sc->sc_flags)) { ti.encrypt_type = HAL_ENCRYPT_TYPE_OPEN; @@ -22752,16 +22982,18 @@ qwz_dp_tx(struct qwz_softc *sc, struct q switch (k->k_cipher) { case IEEE80211_CIPHER_CCMP: ti.encrypt_type = HAL_ENCRYPT_TYPE_CCMP_128; - if (m_makespace(m, m->m_pkthdr.len, - IEEE80211_CCMP_MICLEN, &off) == NULL) { + if (ti.encap_type == HAL_TCL_ENCAP_TYPE_RAW && + m_makespace(m, m->m_pkthdr.len, + IEEE80211_CCMP_MICLEN, &off) == NULL) { m_freem(m); return ENOSPC; } break; case IEEE80211_CIPHER_TKIP: ti.encrypt_type = HAL_ENCRYPT_TYPE_TKIP_MIC; - if (m_makespace(m, m->m_pkthdr.len, - IEEE80211_TKIP_MICLEN, &off) == NULL) { + if (ti.encap_type == HAL_TCL_ENCAP_TYPE_RAW && + m_makespace(m, m->m_pkthdr.len, + IEEE80211_TKIP_MICLEN, &off) == NULL) { m_freem(m); return ENOSPC; } @@ -22772,7 +23004,8 @@ qwz_dp_tx(struct qwz_softc *sc, struct q } } - if (ti.encrypt_type == HAL_ENCRYPT_TYPE_OPEN) { + if (ti.encrypt_type == HAL_ENCRYPT_TYPE_OPEN && + ti.encap_type == HAL_TCL_ENCAP_TYPE_RAW) { /* Using software crypto. */ if ((m = ieee80211_encrypt(ic, m, k)) == NULL) return ENOBUFS; @@ -23573,11 +23806,14 @@ qwz_auth(struct qwz_softc *sc) qwz_recalculate_mgmt_rate(sc, ni, arvif->vdev_id, pdev->pdev_id); ni->ni_txrate = 0; - ret = qwz_mac_station_add(sc, arvif, pdev->pdev_id, ni); - if (ret) - return ret; - - /* Start vdev. */ + /* + * Start vdev BEFORE creating the peer. Linux ath12k starts the + * vdev at chanctx-assignment time, well before any PEER_CREATE. + * If we instead PEER_CREATE first, the FW creates the peer entry + * against an unstarted vdev: the entry is half-initialized (no + * channel binding) and AUTHORIZE later dispatches through a stale + * function pointer in dlpager and crashes. + */ ret = qwz_mac_vdev_start(sc, arvif, pdev->pdev_id); if (ret) { printf("%s: failed to start MAC for VDEV: %d\n", @@ -23591,6 +23827,10 @@ qwz_auth(struct qwz_softc *sc) */ qwz_recalculate_mgmt_rate(sc, ni, arvif->vdev_id, pdev->pdev_id); + ret = qwz_mac_station_add(sc, arvif, pdev->pdev_id, ni); + if (ret) + return ret; + return ret; } @@ -23640,9 +23880,28 @@ qwz_peer_assoc_h_basic(struct qwz_softc arg->vdev_id = arvif->vdev_id; arg->peer_associd = IEEE80211_AID(ni->ni_associd); arg->auth_flag = 1; - arg->peer_listen_intval = ni->ni_intval; + /* + * peer_listen_intval is the STA wake interval in BEACONS, not in + * TUs. Linux ath12k passes hw->conf.listen_interval which is 1 + * (wake every beacon). ni->ni_intval would be the beacon + * interval in TUs (~100); using that here causes the FW to set + * up power-save logic with an unrealistic interval, which is + * then dispatched after AUTHORIZE and crashes dlpager. + */ + arg->peer_listen_intval = 1; arg->peer_nss = 1; arg->peer_caps = ni->ni_capinfo; + + /* + * Modern WCN7850 FW expects WMI_PEER_QOS for any STA peer. + * Without it the FW peer state is internally inconsistent and + * AUTHORIZE later dispatches into a stale function pointer, + * crashing dlpager. Linux ath12k sets these in + * ath12k_peer_assoc_h_qos based on sta->wme; we don't have an + * easy net80211 equivalent so set them unconditionally for STA. + */ + arg->is_wme_set = 1; + arg->qos_flag = 1; } void @@ -23745,6 +24004,7 @@ qwz_run(struct qwz_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_node *ni = ic->ic_bss; + struct qwz_node *nq = (struct qwz_node *)ni; struct qwz_vif *arvif = TAILQ_FIRST(&sc->vif_list); /* XXX */ uint8_t pdev_id = 0; /* TODO: derive pdev ID somehow? */ struct peer_assoc_params peer_arg; @@ -23756,7 +24016,40 @@ qwz_run(struct qwz_softc *sc) DNPRINTF(QWZ_D_MAC, "%s: vdev %i assoc bssid %pM aid %d\n", __func__, arvif->vdev_id, arvif->bssid, arvif->aid); + /* + * Clear stale per-node key-install flags before this association + * attempt. qwz_run_stop normally resets them on RUN -> lower + * transitions, but a fatal_firmware_error mid-association doesn't + * always tear down through RUN, so flags survive into the next + * cycle. When that happens, qwz_add_sta_key's "(flags & + * want_keymask) == want_keymask" check passes after the very + * first INSTALL_KEY (because the OTHER bit was carried over), + * AUTHORIZE fires mid-key-install, and the FW ends up authorizing + * an only-half-keyed peer. The first protected data TX after + * that crashes dlpager. + */ + nq->flags &= ~(QWZ_NODE_FLAG_HAVE_PAIRWISE_KEY | + QWZ_NODE_FLAG_HAVE_GROUP_KEY); + qwz_peer_assoc_prepare(sc, arvif, ni, &peer_arg, 0); + + /* + * Tell the FW the per-vdev HE MU mode before peer_assoc. Linux + * ath12k_bss_assoc explicitly comments "keep this before + * ath12k_wmi_send_peer_assoc_cmd()": the FW configures HE MU + * structures from this value at peer_assoc time, and we don't + * want a stale boot-default left over from earlier configuration. + * For our non-HE peer the computed hemode is always 0 (Linux's + * ath12k_mac_vif_recalc_sta_he_txbf early-returns when + * !he_support, leaving the local "hemode = 0" intact). + */ + ret = qwz_wmi_vdev_set_param_cmd(sc, arvif->vdev_id, pdev_id, + WMI_VDEV_PARAM_SET_HEMU_MODE, 0); + if (ret) { + printf("%s: failed to submit vdev param SET_HEMU_MODE 0: %d\n", + sc->sc_dev.dv_xname, ret); + return ret; + } peer_arg.is_assoc = 1; Index: sys/dev/ic/qwzreg.h =================================================================== RCS file: /cvs/src/sys/dev/ic/qwzreg.h,v diff -u -p -u -p -r1.13 qwzreg.h --- sys/dev/ic/qwzreg.h 26 Apr 2026 19:25:08 -0000 1.13 +++ sys/dev/ic/qwzreg.h 14 May 2026 14:45:55 -0000 @@ -1952,6 +1952,7 @@ enum wmi_tlv_tag { WMI_TAG_PDEV_NON_SRG_OBSS_BSSID_ENABLE_BITMAP_CMD, WMI_TAG_REGULATORY_RULE_EXT_STRUCT = 0x3A9, WMI_TAG_REG_CHAN_LIST_CC_EXT_EVENT, + WMI_TAG_MLO_PEER_CREATE_PARAMS = 0x3D5, WMI_TAG_PDEV_SET_BIOS_SAR_TABLE_CMD = 0x3D8, WMI_TAG_PDEV_SET_BIOS_GEO_TABLE_CMD, WMI_TAG_MAX @@ -2766,6 +2767,13 @@ struct wmi_vdev_create_cmd { uint32_t pdev_id; uint32_t mbssid_flags; uint32_t mbssid_tx_vdev_id; + /* + * Trailing fields the FW expects. Mirror of Linux + * struct wmi_vdev_create_cmd. Without them the FW reads + * garbage past our struct and may crash. + */ + uint32_t vdev_stats_id_valid; + uint32_t vdev_stats_id; } __packed; struct wmi_vdev_txrx_streams { @@ -2832,6 +2840,13 @@ struct wmi_vdev_start_request_cmd { uint32_t min_data_rate; uint32_t mbssid_flags; uint32_t mbssid_tx_vdev_id; + /* + * Trailing fields the FW expects. Mirror of Linux + * struct wmi_vdev_start_request_cmd. Without them the FW + * reads garbage past our struct and may crash. + */ + uint32_t eht_ops; + uint32_t punct_bitmap; } __packed; #define MGMT_TX_DL_FRM_LEN 64 @@ -3140,6 +3155,11 @@ struct wmi_peer_create_cmd { uint32_t peer_type; } __packed; +struct wmi_peer_create_mlo_params { + uint32_t tlv_header; + uint32_t flags; +} __packed; + struct wmi_peer_delete_cmd { uint32_t tlv_header; uint32_t vdev_id; @@ -3873,6 +3893,13 @@ struct peer_assoc_params { struct ath12k_ppe_threshold peer_ppet; }; +/* + * EHT (WiFi 7) capability array sizes. We don't use EHT but the FW + * still expects these fields in the wmi_peer_assoc_complete_cmd struct. + */ +#define WMI_MAX_EHTCAP_MAC_SIZE 2 +#define WMI_MAX_EHTCAP_PHY_SIZE 3 + struct wmi_peer_assoc_complete_cmd { uint32_t tlv_header; struct wmi_mac_addr peer_macaddr; @@ -3902,6 +3929,25 @@ struct wmi_peer_assoc_complete_cmd { uint32_t peer_he_cap_info_internal; uint32_t min_data_rate; uint32_t peer_he_caps_6ghz; + /* + * Trailing fields the FW expects. Zero-initialised via the + * mbuf memset in qwz_htc_alloc_mbuf; we never set them because + * we don't support MLO (multi-link), WiFi 7 EHT, or per-peer + * auth-mode override. Without these fields the FW reads + * garbage past our struct (interpreting the legacy-rates TLV + * header and following bytes as struct fields) and crashes + * inside dlpager. Mirror of Linux struct + * wmi_peer_assoc_complete_cmd. + */ + uint32_t sta_type; + uint32_t bss_max_idle_option; + uint32_t auth_mode; + uint32_t peer_flags_ext; + uint32_t punct_bitmap; + uint32_t peer_eht_cap_mac[WMI_MAX_EHTCAP_MAC_SIZE]; + uint32_t peer_eht_cap_phy[WMI_MAX_EHTCAP_PHY_SIZE]; + uint32_t peer_eht_ops; + struct wmi_ppe_threshold peer_eht_ppet; } __packed; struct wmi_stop_scan_cmd { @@ -7651,8 +7697,8 @@ enum hal_reo_cmd_status { #define BUFFER_ADDR_INFO0_ADDR GENMASK(31, 0) #define BUFFER_ADDR_INFO1_ADDR GENMASK(7, 0) -#define BUFFER_ADDR_INFO1_RET_BUF_MGR GENMASK(10, 8) -#define BUFFER_ADDR_INFO1_SW_COOKIE GENMASK(31, 11) +#define BUFFER_ADDR_INFO1_RET_BUF_MGR GENMASK(11, 8) +#define BUFFER_ADDR_INFO1_SW_COOKIE GENMASK(31, 12) struct ath12k_buffer_addr { uint32_t info0; @@ -9438,14 +9484,14 @@ struct hal_ce_srng_dst_status_desc { */ #define HAL_TX_RATE_STATS_INFO0_VALID BIT(0) -#define HAL_TX_RATE_STATS_INFO0_BW GENMASK(2, 1) -#define HAL_TX_RATE_STATS_INFO0_PKT_TYPE GENMASK(6, 3) -#define HAL_TX_RATE_STATS_INFO0_STBC BIT(7) -#define HAL_TX_RATE_STATS_INFO0_LDPC BIT(8) -#define HAL_TX_RATE_STATS_INFO0_SGI GENMASK(10, 9) -#define HAL_TX_RATE_STATS_INFO0_MCS GENMASK(14, 11) -#define HAL_TX_RATE_STATS_INFO0_OFDMA_TX BIT(15) -#define HAL_TX_RATE_STATS_INFO0_TONES_IN_RU GENMASK(27, 16) +#define HAL_TX_RATE_STATS_INFO0_BW GENMASK(3, 1) +#define HAL_TX_RATE_STATS_INFO0_PKT_TYPE GENMASK(7, 4) +#define HAL_TX_RATE_STATS_INFO0_STBC BIT(8) +#define HAL_TX_RATE_STATS_INFO0_LDPC BIT(9) +#define HAL_TX_RATE_STATS_INFO0_SGI GENMASK(11, 10) +#define HAL_TX_RATE_STATS_INFO0_MCS GENMASK(15, 12) +#define HAL_TX_RATE_STATS_INFO0_OFDMA_TX BIT(16) +#define HAL_TX_RATE_STATS_INFO0_TONES_IN_RU GENMASK(28, 17) enum hal_tx_rate_stats_bw { HAL_TX_RATE_STATS_BW_20, @@ -9862,19 +9908,19 @@ enum hal_rx_reo_queue_pn_size { #define HAL_RX_REO_QUEUE_INFO0_RETRY BIT(8) #define HAL_RX_REO_QUEUE_INFO0_CHECK_2K_MODE BIT(9) #define HAL_RX_REO_QUEUE_INFO0_OOR_MODE BIT(10) -#define HAL_RX_REO_QUEUE_INFO0_BA_WINDOW_SIZE GENMASK(18, 11) -#define HAL_RX_REO_QUEUE_INFO0_PN_CHECK BIT(19) -#define HAL_RX_REO_QUEUE_INFO0_EVEN_PN BIT(20) -#define HAL_RX_REO_QUEUE_INFO0_UNEVEN_PN BIT(21) -#define HAL_RX_REO_QUEUE_INFO0_PN_HANDLE_ENABLE BIT(22) -#define HAL_RX_REO_QUEUE_INFO0_PN_SIZE GENMASK(24, 23) -#define HAL_RX_REO_QUEUE_INFO0_IGNORE_AMPDU_FLG BIT(25) +#define HAL_RX_REO_QUEUE_INFO0_BA_WINDOW_SIZE GENMASK(20, 11) +#define HAL_RX_REO_QUEUE_INFO0_PN_CHECK BIT(21) +#define HAL_RX_REO_QUEUE_INFO0_EVEN_PN BIT(22) +#define HAL_RX_REO_QUEUE_INFO0_UNEVEN_PN BIT(23) +#define HAL_RX_REO_QUEUE_INFO0_PN_HANDLE_ENABLE BIT(24) +#define HAL_RX_REO_QUEUE_INFO0_PN_SIZE GENMASK(26, 25) +#define HAL_RX_REO_QUEUE_INFO0_IGNORE_AMPDU_FLG BIT(27) #define HAL_RX_REO_QUEUE_INFO1_SVLD BIT(0) #define HAL_RX_REO_QUEUE_INFO1_SSN GENMASK(12, 1) -#define HAL_RX_REO_QUEUE_INFO1_CURRENT_IDX GENMASK(20, 13) -#define HAL_RX_REO_QUEUE_INFO1_SEQ_2K_ERR BIT(21) -#define HAL_RX_REO_QUEUE_INFO1_PN_ERR BIT(22) +#define HAL_RX_REO_QUEUE_INFO1_CURRENT_IDX GENMASK(22, 13) +#define HAL_RX_REO_QUEUE_INFO1_SEQ_2K_ERR BIT(23) +#define HAL_RX_REO_QUEUE_INFO1_PN_ERR BIT(24) #define HAL_RX_REO_QUEUE_INFO1_PN_VALID BIT(31) #define HAL_RX_REO_QUEUE_INFO2_MPDU_COUNT GENMASK(6, 0) @@ -10020,13 +10066,13 @@ struct hal_rx_reo_queue { #define HAL_REO_UPD_RX_QUEUE_INFO1_PN_HANDLE_ENABLE BIT(30) #define HAL_REO_UPD_RX_QUEUE_INFO1_IGNORE_AMPDU_FLG BIT(31) -#define HAL_REO_UPD_RX_QUEUE_INFO2_BA_WINDOW_SIZE GENMASK(7, 0) -#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_SIZE GENMASK(9, 8) -#define HAL_REO_UPD_RX_QUEUE_INFO2_SVLD BIT(10) -#define HAL_REO_UPD_RX_QUEUE_INFO2_SSN GENMASK(22, 11) -#define HAL_REO_UPD_RX_QUEUE_INFO2_SEQ_2K_ERR BIT(23) -#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_ERR BIT(24) -#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_VALID BIT(25) +#define HAL_REO_UPD_RX_QUEUE_INFO2_BA_WINDOW_SIZE GENMASK(9, 0) +#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_SIZE GENMASK(11, 10) +#define HAL_REO_UPD_RX_QUEUE_INFO2_SVLD BIT(12) +#define HAL_REO_UPD_RX_QUEUE_INFO2_SSN GENMASK(24, 13) +#define HAL_REO_UPD_RX_QUEUE_INFO2_SEQ_2K_ERR BIT(25) +#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_ERR BIT(26) +#define HAL_REO_UPD_RX_QUEUE_INFO2_PN_VALID BIT(27) struct hal_reo_update_rx_queue { struct hal_reo_cmd_hdr cmd; @@ -12396,7 +12442,7 @@ struct hal_rx_desc_qcn9274_compact { * of upstream Linux/ath12k headers would suggest). The padding between * msdu_end and mpdu_start_tag is 80 bytes, not 8. */ -#define RX_BE_PADDING0_BYTES 80 +#define RX_BE_PADDING0_BYTES 8 #define RX_BE_PADDING1_BYTES 8 #define HAL_RX_BE_PKT_HDR_TLV_LEN 112 @@ -12454,14 +12500,12 @@ struct hal_rx_desc { #define HTT_INVALID_PEER_ID 0xffff -/* HTT tx completion is overlaid in wbm_release_ring */ -#define HTT_TX_WBM_COMP_INFO0_STATUS GENMASK(12, 9) -#define HTT_TX_WBM_COMP_INFO0_REINJECT_REASON GENMASK(16, 13) -#define HTT_TX_WBM_COMP_INFO0_REINJECT_REASON GENMASK(16, 13) - -#define HTT_TX_WBM_COMP_INFO1_ACK_RSSI GENMASK(31, 24) -#define HTT_TX_WBM_COMP_INFO2_SW_PEER_ID GENMASK(15, 0) -#define HTT_TX_WBM_COMP_INFO2_VALID BIT(21) +/* HTT tx completion is overlaid in wbm_release_ring (ath12k wifi7) */ +#define HTT_TX_WBM_COMP_INFO0_STATUS GENMASK(16, 13) +#define HTT_TX_WBM_COMP_INFO1_REINJECT_REASON GENMASK(3, 0) +#define HTT_TX_WBM_COMP_INFO1_EXCEPTION_FRAME BIT(4) + +#define HTT_TX_WBM_COMP_INFO2_ACK_RSSI GENMASK(31, 24) struct htt_tx_wbm_completion { uint32_t info0; @@ -12819,8 +12863,30 @@ enum htt_ppdu_stats_tag_type { #define HTT_RX_RING_SELECTION_CFG_CMD_INFO0_RING_ID GENMASK(23, 16) #define HTT_RX_RING_SELECTION_CFG_CMD_INFO0_SS BIT(24) #define HTT_RX_RING_SELECTION_CFG_CMD_INFO0_PS BIT(25) - -#define HTT_RX_RING_SELECTION_CFG_CMD_INFO1_BUF_SIZE GENMASK(15, 0) +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO0_OFFSET_VALID BIT(26) +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO0_DROP_THRES_VAL BIT(27) +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO0_EN_RXMON BIT(28) + +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO1_BUF_SIZE GENMASK(15, 0) +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO1_CONF_LEN_MGMT GENMASK(18, 16) +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO1_CONF_LEN_CTRL GENMASK(21, 19) +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO1_CONF_LEN_DATA GENMASK(24, 22) + +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO2_DROP_THRESHOLD GENMASK(9, 0) +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO2_EN_LOG_MGMT_TYPE BIT(17) +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO2_EN_CTRL_TYPE BIT(18) +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO2_EN_LOG_DATA_TYPE BIT(19) + +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO3_EN_TLV_PKT_OFFSET BIT(0) +#define HTT_RX_RING_SELECTION_CFG_CMD_INFO3_PKT_TLV_OFFSET GENMASK(14, 1) + +#define HTT_RX_RING_SELECTION_CFG_RX_PACKET_OFFSET GENMASK(15, 0) +#define HTT_RX_RING_SELECTION_CFG_RX_HEADER_OFFSET GENMASK(31, 16) +#define HTT_RX_RING_SELECTION_CFG_RX_MPDU_END_OFFSET GENMASK(15, 0) +#define HTT_RX_RING_SELECTION_CFG_RX_MPDU_START_OFFSET GENMASK(31, 16) +#define HTT_RX_RING_SELECTION_CFG_RX_MSDU_END_OFFSET GENMASK(15, 0) +#define HTT_RX_RING_SELECTION_CFG_RX_MSDU_START_OFFSET GENMASK(31, 16) +#define HTT_RX_RING_SELECTION_CFG_RX_ATTENTION_OFFSET GENMASK(15, 0) enum htt_rx_filter_tlv_flags { HTT_RX_FILTER_TLV_FLAGS_MPDU_START = BIT(0), @@ -13132,6 +13198,15 @@ struct htt_rx_ring_selection_cfg_cmd { uint32_t pkt_type_en_flags2; uint32_t pkt_type_en_flags3; uint32_t rx_filter_tlv; + uint32_t rx_packet_offset; /* packet & header offsets */ + uint32_t rx_mpdu_offset; /* mpdu start & end offsets */ + uint32_t rx_msdu_offset; /* msdu start & end offsets */ + uint32_t rx_attn_offset; /* attention TLV offset */ + uint32_t info2; + uint32_t reserved[2]; + uint32_t rx_mpdu_start_end_mask; + uint32_t rx_msdu_end_word_mask; + uint32_t info3; } __packed; struct htt_rx_ring_tlv_filter { @@ -13140,6 +13215,14 @@ struct htt_rx_ring_tlv_filter { uint32_t pkt_filter_flags1; /* MGMT */ uint32_t pkt_filter_flags2; /* CTRL */ uint32_t pkt_filter_flags3; /* DATA */ + int offset_valid; + uint16_t rx_packet_offset; + uint16_t rx_header_offset; + uint16_t rx_mpdu_end_offset; + uint16_t rx_mpdu_start_offset; + uint16_t rx_msdu_end_offset; + uint16_t rx_msdu_start_offset; + uint16_t rx_attn_offset; }; #define HTT_RX_FULL_MON_MODE_CFG_CMD_INFO0_MSG_TYPE GENMASK(7, 0) Index: sys/dev/ic/qwzvar.h =================================================================== RCS file: /cvs/src/sys/dev/ic/qwzvar.h,v diff -u -p -u -p -r1.14 qwzvar.h --- sys/dev/ic/qwzvar.h 26 Apr 2026 19:25:08 -0000 1.14 +++ sys/dev/ic/qwzvar.h 14 May 2026 14:45:55 -0000 @@ -1982,6 +1982,8 @@ struct qwz_softc { uint16_t qmi_txn_id; int qmi_cal_done; + int single_chip_mlo_support; + uint8_t qmi_phy_cap_num_phy; struct qwz_qmi_ce_cfg qmi_ce_cfg; struct qwz_qmi_target_info qmi_target; struct qwz_qmi_dev_mem_info qmi_dev_mem[ATH12K_QMI_WLFW_MAX_DEV_MEM_NUM_V01]; Index: sys/dev/pci/if_qwz_pci.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_qwz_pci.c,v diff -u -p -u -p -r1.8 if_qwz_pci.c --- sys/dev/pci/if_qwz_pci.c 26 Apr 2026 19:25:08 -0000 1.8 +++ sys/dev/pci/if_qwz_pci.c 14 May 2026 14:45:55 -0000 @@ -2886,6 +2886,7 @@ qwz_mhi_fw_load_handler(struct qwz_pci_s u_char *data; size_t len; + amss_path[0] = '\0'; if (sc->fw_img[QWZ_FW_AMSS].data) { data = sc->fw_img[QWZ_FW_AMSS].data; len = sc->fw_img[QWZ_FW_AMSS].size; @@ -2916,16 +2917,40 @@ qwz_mhi_fw_load_handler(struct qwz_pci_s /* Second-stage boot loader sits in the first 512 KB of image. */ ret = qwz_mhi_fw_load_bhi(psc, data, MHI_DMA_VEC_CHUNK_SIZE); if (ret != 0) { - printf("%s: could not load firmware %s\n", - sc->sc_dev.dv_xname, amss_path); + printf("%s: could not load firmware %s (BHI ret=%d)\n", + sc->sc_dev.dv_xname, amss_path, ret); return ret; } - /* Now load the full image. */ + /* + * Mirror Linux's MHI state-worker ordering: wait for the chip + * to reach SBL EE + M0 before starting the BHIE upload. + */ + while (psc->bhi_ee < MHI_EE_SBL) { + ret = tsleep_nsec(&psc->bhi_ee, 0, "qwzsbl", + SEC_TO_NSEC(5)); + if (ret) { + printf("%s: timeout waiting for SBL EE (bhi_ee=%d)\n", + sc->sc_dev.dv_xname, psc->bhi_ee); + return ret; + } + } + + while (psc->mhi_state != MHI_STATE_M0) { + ret = tsleep_nsec(&psc->mhi_state, 0, "qwzm0", + SEC_TO_NSEC(5)); + if (ret) { + printf("%s: timeout waiting for M0 state " + "(mhi_state=0x%x)\n", + sc->sc_dev.dv_xname, psc->mhi_state); + return ret; + } + } + ret = qwz_mhi_fw_load_bhie(psc, data, len); if (ret != 0) { - printf("%s: could not load firmware %s\n", - sc->sc_dev.dv_xname, amss_path); + printf("%s: could not load firmware %s (BHIE ret=%d)\n", + sc->sc_dev.dv_xname, amss_path, ret); return ret; } @@ -3143,6 +3168,18 @@ qwz_mhi_fw_load_bhi(struct qwz_pci_softc /* Copy firmware image to DMA memory. */ memcpy(QWZ_DMA_KVA(data_adm), data, len); + /* + * Even though the buffer was mapped with BUS_DMA_COHERENT, force + * a PREWRITE sync so any pending CPU stores reach DRAM before the + * device DMAs the firmware bytes. Every other DMA buffer in qwz + * does this -- BHI/BHIE were the exceptions. If the FW computes + * an internal hash over what it reads and a few bytes are stale, + * dlpager fails its TPZ authentication and the entire pager + * subsystem refuses to come up post-AMSS. + */ + bus_dmamap_sync(sc->sc_dmat, QWZ_DMA_MAP(data_adm), 0, len, + BUS_DMASYNC_PREWRITE); + qwz_pci_write(sc, psc->bhi_off + MHI_BHI_STATUS, 0); /* Set data physical address and length. */ @@ -3239,6 +3276,19 @@ qwz_mhi_fw_load_bhie(struct qwz_pci_soft } else vec[i].size = remain; } + + /* + * PREWRITE sync both buffers before the device starts DMAing. + * BUS_DMA_COHERENT mappings still need this on ARM64 to flush + * any pending CPU stores to DRAM. See same rationale in + * qwz_mhi_fw_load_bhi -- the FW image is hash-validated by + * dlpager TPZ, and even a few stale bytes cause authentication + * to fail and the swap region to never come up. + */ + bus_dmamap_sync(sc->sc_dmat, QWZ_DMA_MAP(psc->amss_data), 0, len, + BUS_DMASYNC_PREWRITE); + bus_dmamap_sync(sc->sc_dmat, QWZ_DMA_MAP(psc->amss_vec), 0, vec_size, + BUS_DMASYNC_PREWRITE); /* Set vector physical address and length. */ paddr = QWZ_DMA_DVA(psc->amss_vec);