From: Dave Voutila Subject: Re: teaching vmd virtio 1.2 To: tech@openbsd.org Cc: sf@openbsd.org, mlarkin@openbsd.org Date: Thu, 31 Jul 2025 05:49:31 -0400 Dave Voutila writes: > Testers wanted (who's running an archaic Linux guest?), but also looking > for ok's. > > tl;dr: updates vmd's virtio implementation to support virtio 1.2 and > upgrade vioscsi and viornd to use the new 1.x implementation. In doing > so, cleans up core virtqueue handling for devices still in the virtio > 0.9 dark ages. > > The below diff has already seen testing in a previous form back in June, > but I've worked to include some critical feedback from sf@ related to > the virtio 1.x spec: > > - adjusts queue size and masks to be dynamic for v1.x devices (caps > max queue size as IOV_MAX) and adds power-of-2 checks > - adds pci revision support and sets the v1.x devices to rev 1 to > report they are non-transitional devices and want only the v1.x > protocol > - cleans up the virtqueue reset handling across all devices to use a > common function > > These changes warrant some more tire kicking and scrutiny. > > Hoping to get this into the tree early next week if people have time to > do a final review and testing. Next step is transitioning remaining > devices (net, block, vmmci). > Had ample time on a flight and first day of hackathon, so here's a diff that includes migrating virtio block and network devices as well. Only device left is/will be vmmci. Positive test reports on the previous diff and this just builds off of that one. I can either carve out the vioblk and vionet stuff from this diff and commit separately or all at once. OKs or feedback? diffstat refs/heads/master refs/heads/virtio-1.2-uber M usr.sbin/vmd/mc146818.c | 1+ 1- M usr.sbin/vmd/pci.c | 55+ 9- M usr.sbin/vmd/pci.h | 16+ 1- M usr.sbin/vmd/vioblk.c | 104+ 254- M usr.sbin/vmd/vionet.c | 413+ 190- M usr.sbin/vmd/vioscsi.c | 359+ 599- M usr.sbin/vmd/virtio.c | 837+ 299- M usr.sbin/vmd/virtio.h | 131+ 92- M usr.sbin/vmd/vm.c | 2+ 2- M usr.sbin/vmd/vmd.h | 3+ 0- 10 files changed, 1921 insertions(+), 1447 deletions(-) diff refs/heads/master refs/heads/virtio-1.2-uber commit - 788294299689adc0a6c392611e2b1f3c1288bdd5 commit + 52df8a6fe59487f743b8d5f5c80a4ff2adf0e0ba blob - 2c6b76c6c8387579b7c09672c1fcc34860c7593f blob + 50e20c4646c71a4de6eeded38d458e5814c7dd1b --- usr.sbin/vmd/mc146818.c +++ usr.sbin/vmd/mc146818.c @@ -127,7 +127,7 @@ rtc_fire1(int fd, short type, void *arg) if (rtc.now - old > 5) { log_debug("%s: RTC clock drift (%llds), requesting guest " "resync", __func__, (rtc.now - old)); - vmmci_ctl(VMMCI_SYNCRTC); + vmmci_ctl(&vmmci, VMMCI_SYNCRTC); } evtimer_add(&rtc.sec, &rtc.sec_tv); } blob - ef5bbeb94c3e1345147078952303d429b8a9de52 blob + 242397ad4aa9a9e54ec2d1dd049f1247bb858c03 --- usr.sbin/vmd/pci.c +++ usr.sbin/vmd/pci.c @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -54,7 +55,7 @@ const uint8_t pci_pic_irqs[PCI_MAX_PIC_IRQS] = {3, 5, * barfn: callback function invoked on BAR access * cookie: cookie passed to barfn on access * - * Returns 0 if the BAR was added successfully, 1 otherwise. + * Returns the index of the BAR if added successfully, -1 otherwise. */ int pci_add_bar(uint8_t id, uint32_t type, void *barfn, void *cookie) @@ -63,18 +64,18 @@ pci_add_bar(uint8_t id, uint32_t type, void *barfn, vo /* Check id */ if (id >= pci.pci_dev_ct) - return (1); + return (-1); /* Can only add PCI_MAX_BARS BARs to any device */ bar_ct = pci.pci_devices[id].pd_bar_ct; if (bar_ct >= PCI_MAX_BARS) - return (1); + return (-1); /* Compute BAR address and add */ bar_reg_idx = (PCI_MAPREG_START + (bar_ct * 4)) / 4; if (type == PCI_MAPREG_TYPE_MEM) { if (pci.pci_next_mmio_bar >= PCI_MMIO_BAR_END) - return (1); + return (-1); pci.pci_devices[id].pd_cfg_space[bar_reg_idx] = PCI_MAPREG_MEM_ADDR(pci.pci_next_mmio_bar); @@ -88,7 +89,7 @@ pci_add_bar(uint8_t id, uint32_t type, void *barfn, vo #ifdef __amd64__ else if (type == PCI_MAPREG_TYPE_IO) { if (pci.pci_next_io_bar >= VM_PCI_IO_BAR_END) - return (1); + return (-1); pci.pci_devices[id].pd_cfg_space[bar_reg_idx] = PCI_MAPREG_IO_ADDR(pci.pci_next_io_bar) | @@ -104,7 +105,7 @@ pci_add_bar(uint8_t id, uint32_t type, void *barfn, vo } #endif /* __amd64__ */ - return (0); + return ((int)bar_ct); } int @@ -156,6 +157,7 @@ pci_get_dev_irq(uint8_t id) * subclass: PCI 'subclass' of the new device * subsys_vid: subsystem VID of the new device * subsys_id: subsystem ID of the new device + * rev_id: revision id * irq_needed: 1 if an IRQ should be assigned to this PCI device, 0 otherwise * csfunc: PCI config space callback function when the guest VM accesses * CS of this PCI device @@ -167,7 +169,7 @@ pci_get_dev_irq(uint8_t id) int pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid, uint8_t class, uint8_t subclass, uint16_t subsys_vid, uint16_t subsys_id, - uint8_t irq_needed, pci_cs_fn_t csfunc) + uint8_t rev_id, uint8_t irq_needed, pci_cs_fn_t csfunc) { /* Exceeded max devices? */ if (pci.pci_dev_ct >= PCI_CONFIG_MAX_DEV) @@ -182,6 +184,7 @@ pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid pci.pci_devices[*id].pd_vid = vid; pci.pci_devices[*id].pd_did = pid; + pci.pci_devices[*id].pd_rev = rev_id; pci.pci_devices[*id].pd_class = class; pci.pci_devices[*id].pd_subclass = subclass; pci.pci_devices[*id].pd_subsys_vid = subsys_vid; @@ -204,6 +207,34 @@ pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid return (0); } +int +pci_add_capability(uint8_t id, struct pci_cap *cap) +{ + uint8_t cid; + struct pci_dev *dev = NULL; + + if (id >= pci.pci_dev_ct) + return (-1); + dev = &pci.pci_devices[id]; + + if (dev->pd_cap_ct >= PCI_MAX_CAPS) + return (-1); + cid = dev->pd_cap_ct; + + memcpy(&dev->pd_caps[cid], cap, sizeof(dev->pd_caps[0])); + + /* Update the linkage. */ + if (cid > 0) + dev->pd_caps[cid - 1].pc_next = (sizeof(struct pci_cap) * cid) + + offsetof(struct pci_dev, pd_caps); + + dev->pd_cap_ct++; + dev->pd_cap = offsetof(struct pci_dev, pd_caps); + dev->pd_status |= (PCI_STATUS_CAPLIST_SUPPORT >> 16); + + return (cid); +} + /* * pci_init * @@ -216,15 +247,18 @@ pci_init(void) uint8_t id; memset(&pci, 0, sizeof(pci)); + + /* Check if changes to struct pci_dev create an invalid config space. */ + CTASSERT(sizeof(pci.pci_devices[0].pd_cfg_space) <= 256); + pci.pci_next_mmio_bar = PCI_MMIO_BAR_BASE; - #ifdef __amd64__ pci.pci_next_io_bar = VM_PCI_IO_BAR_BASE; #endif /* __amd64__ */ if (pci_add_device(&id, PCI_VENDOR_OPENBSD, PCI_PRODUCT_OPENBSD_PCHB, PCI_CLASS_BRIDGE, PCI_SUBCLASS_BRIDGE_HOST, - PCI_VENDOR_OPENBSD, 0, 0, NULL)) { + PCI_VENDOR_OPENBSD, 0, 0, 0, NULL)) { log_warnx("%s: can't add PCI host bridge", __progname); return; } @@ -442,3 +476,15 @@ pci_find_first_device(uint16_t subsys_id) return (i); return (-1); } + +/* + * Retrieve the subsystem identifier for a PCI device if found, otherwise 0. + */ +uint16_t +pci_get_subsys_id(uint8_t pci_id) +{ + if (pci_id >= pci.pci_dev_ct) + return (0); + else + return (pci.pci_devices[pci_id].pd_subsys_id); +} blob - bb874674aafa7d26bfd790d182f45daf4d5cb014 blob + 1d417572421db8b189ef85b89ab6bf74305db8d2 --- usr.sbin/vmd/pci.h +++ usr.sbin/vmd/pci.h @@ -30,6 +30,7 @@ #define PCI_MODE1_DATA_REG 0x0cfc #define PCI_CONFIG_MAX_DEV 32 #define PCI_MAX_BARS 6 +#define PCI_MAX_CAPS 8 #define PCI_BAR_TYPE_IO 0x0 #define PCI_BAR_TYPE_MMIO 0x1 @@ -44,6 +45,15 @@ typedef int (*pci_iobar_fn_t)(int dir, uint16_t reg, u void *, uint8_t); typedef int (*pci_mmiobar_fn_t)(int dir, uint32_t ofs, uint32_t *data); +/* + * Represents a PCI Capability entry with enough space for the virtio-specific + * capabilities. + */ +struct pci_cap { + uint8_t pc_vndr; /* Vendor-specific ID */ + uint8_t pc_next; /* Link to next capability */ + uint8_t pc_extra[22]; /* Enough space for Virtio PCI data. */ +} __packed; struct pci_dev { union { @@ -73,9 +83,12 @@ struct pci_dev { uint8_t pd_int; uint8_t pd_min_grant; uint8_t pd_max_grant; + struct pci_cap pd_caps[PCI_MAX_CAPS]; } __packed; }; + uint8_t pd_bar_ct; + uint8_t pd_cap_ct; pci_cs_fn_t pd_csfunc; uint8_t pd_bartype[PCI_MAX_BARS]; @@ -97,10 +110,12 @@ struct pci { int pci_find_first_device(uint16_t); void pci_init(void); int pci_add_device(uint8_t *, uint16_t, uint16_t, uint8_t, uint8_t, uint16_t, - uint16_t, uint8_t, pci_cs_fn_t); + uint16_t, uint8_t, uint8_t, pci_cs_fn_t); +int pci_add_capability(uint8_t, struct pci_cap *); int pci_add_bar(uint8_t, uint32_t, void *, void *); int pci_set_bar_fn(uint8_t, uint8_t, void *, void *); uint8_t pci_get_dev_irq(uint8_t); +uint16_t pci_get_subsys_id(uint8_t); #ifdef __amd64__ void pci_handle_address_reg(struct vm_run_params *); blob - a2da2897ae2f9a3c88609ef22c16628148bbb2b3 blob + ebd63a04cd71c00a681c8c9175dc0af05b552b95 --- usr.sbin/vmd/vioblk.c +++ usr.sbin/vmd/vioblk.c @@ -35,18 +35,16 @@ extern char *__progname; extern struct vmd_vm *current_vm; -struct iovec io_v[VIOBLK_QUEUE_SIZE]; +struct iovec io_v[VIRTIO_QUEUE_SIZE_MAX]; static const char *disk_type(int); -static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *, - int8_t *); -static int handle_io_write(struct viodev_msg *, struct virtio_dev *); +static uint32_t vio1_read(struct virtio_dev *, struct viodev_msg *, int *); +static int vio1_write(struct virtio_dev *, struct viodev_msg *); +static uint32_t vioblk_dev_read(struct virtio_dev *, struct viodev_msg *); -static void vioblk_update_qs(struct vioblk_dev *); -static void vioblk_update_qa(struct vioblk_dev *); -static int vioblk_notifyq(struct vioblk_dev *); -static ssize_t vioblk_rw(struct vioblk_dev *, int, off_t, - struct vring_desc *, struct vring_desc **); +static int vioblk_notifyq(struct virtio_dev *, uint16_t); +static ssize_t vioblk_rw(struct vioblk_dev *, struct virtio_vq_info *, int, + off_t, struct vring_desc *, struct vring_desc **); static void dev_dispatch_vm(int, short, void *); static void handle_sync_io(int, short, void *); @@ -243,43 +241,6 @@ vioblk_cmd_name(uint32_t type) } } -static void -vioblk_update_qa(struct vioblk_dev *dev) -{ - struct virtio_vq_info *vq_info; - void *hva = NULL; - - /* Invalid queue? */ - if (dev->cfg.queue_select > 0) - return; - - vq_info = &dev->vq[dev->cfg.queue_select]; - vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; - - hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOBLK_QUEUE_SIZE)); - if (hva == NULL) - fatal("vioblk_update_qa"); - vq_info->q_hva = hva; -} - -static void -vioblk_update_qs(struct vioblk_dev *dev) -{ - struct virtio_vq_info *vq_info; - - /* Invalid queue? */ - if (dev->cfg.queue_select > 0) { - dev->cfg.queue_size = 0; - return; - } - - vq_info = &dev->vq[dev->cfg.queue_select]; - - /* Update queue pfn/size based on queue select */ - dev->cfg.queue_pfn = vq_info->q_gpa >> 12; - dev->cfg.queue_size = vq_info->qs; -} - /* * Process virtqueue notifications. If an unrecoverable error occurs, puts * device into a "needs reset" state. @@ -287,26 +248,28 @@ vioblk_update_qs(struct vioblk_dev *dev) * Returns 1 if an we need to assert an IRQ. */ static int -vioblk_notifyq(struct vioblk_dev *dev) +vioblk_notifyq(struct virtio_dev *dev, uint16_t vq_idx) { uint32_t cmd_len; uint16_t idx, cmd_desc_idx; uint8_t ds; off_t offset; ssize_t sz; - int is_write, notify = 0, i; + int is_write, notify = 0; char *vr; + size_t i; struct vring_desc *table, *desc; struct vring_avail *avail; struct vring_used *used; struct virtio_blk_req_hdr *cmd; struct virtio_vq_info *vq_info; + struct vioblk_dev *vioblk = &dev->vioblk; /* Invalid queue? */ - if (dev->cfg.queue_notify > 0) + if (vq_idx > dev->num_queues) return (0); - vq_info = &dev->vq[dev->cfg.queue_notify]; + vq_info = &dev->vq[vq_idx]; idx = vq_info->last_avail; vr = vq_info->q_hva; if (vr == NULL) @@ -319,7 +282,7 @@ vioblk_notifyq(struct vioblk_dev *dev) while (idx != avail->idx) { /* Retrieve Command descriptor. */ - cmd_desc_idx = avail->ring[idx & VIOBLK_QUEUE_MASK]; + cmd_desc_idx = avail->ring[idx & vq_info->mask]; desc = &table[cmd_desc_idx]; cmd_len = desc->len; @@ -342,7 +305,7 @@ vioblk_notifyq(struct vioblk_dev *dev) goto reset; /* Advance to the 2nd descriptor. */ - desc = &table[desc->next & VIOBLK_QUEUE_MASK]; + desc = &table[desc->next & vq_info->mask]; /* Process each available command & chain. */ switch (cmd->type) { @@ -351,7 +314,8 @@ vioblk_notifyq(struct vioblk_dev *dev) /* Read (IN) & Write (OUT) */ is_write = (cmd->type == VIRTIO_BLK_T_OUT) ? 1 : 0; offset = cmd->sector * VIRTIO_BLK_SECTOR_SIZE; - sz = vioblk_rw(dev, is_write, offset, table, &desc); + sz = vioblk_rw(vioblk, vq_info, is_write, offset, table, + &desc); if (sz == -1) ds = VIRTIO_BLK_S_IOERR; else @@ -376,8 +340,8 @@ vioblk_notifyq(struct vioblk_dev *dev) /* Advance to the end of the chain, if needed. */ i = 0; while (desc->flags & VRING_DESC_F_NEXT) { - desc = &table[desc->next & VIOBLK_QUEUE_MASK]; - if (++i >= VIOBLK_QUEUE_SIZE) { + desc = &table[desc->next & vq_info->mask]; + if (++i >= vq_info->qs) { /* * If we encounter an infinite/looping chain, * not much we can do but say we need a reset. @@ -398,11 +362,11 @@ vioblk_notifyq(struct vioblk_dev *dev) log_warnx("%s: can't write device status data " "@ 0x%llx",__func__, desc->addr); - dev->cfg.isr_status |= 1; + dev->isr |= 1; notify = 1; - used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; - used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_len; + used->ring[used->idx & vq_info->mask].id = cmd_desc_idx; + used->ring[used->idx & vq_info->mask].len = cmd_len; __sync_synchronize(); used->idx++; @@ -417,8 +381,8 @@ reset: * When setting the "needs reset" flag, the driver is notified * via a configuration change interrupt. */ - dev->cfg.device_status |= DEVICE_NEEDS_RESET; - dev->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; + dev->status |= DEVICE_NEEDS_RESET; + dev->isr |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; return (1); } @@ -498,7 +462,7 @@ handle_sync_io(int fd, short event, void *arg) struct viodev_msg msg; struct imsg imsg; ssize_t n; - int8_t intr = INTR_STATE_NOOP; + int deassert = 0; if (event & EV_READ) { if ((n = imsgbuf_read(ibuf)) == -1) @@ -538,15 +502,18 @@ handle_sync_io(int fd, short event, void *arg) switch (msg.type) { case VIODEV_MSG_IO_READ: /* Read IO: make sure to send a reply */ - msg.data = handle_io_read(&msg, dev, &intr); + msg.data = vio1_read(dev, &msg, &deassert); msg.data_valid = 1; - msg.state = intr; + if (deassert) { + /* Inline any interrupt deassertions. */ + msg.state = INTR_STATE_DEASSERT; + } imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, sizeof(msg)); break; case VIODEV_MSG_IO_WRITE: - /* Write IO: no reply needed */ - if (handle_io_write(&msg, dev) == 1) + /* Write IO: no reply needed, but maybe an irq assert */ + if (vio1_write(dev, &msg)) virtio_assert_irq(dev, 0); break; case VIODEV_MSG_SHUTDOWN: @@ -561,223 +528,106 @@ handle_sync_io(int fd, short event, void *arg) } static int -handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev) +vio1_write(struct virtio_dev *dev, struct viodev_msg *msg) { - struct vioblk_dev *vioblk = &dev->vioblk; uint32_t data = msg->data; + uint16_t reg = msg->reg; + uint8_t sz = msg->io_sz; int intr = 0; - switch (msg->reg) { - case VIRTIO_CONFIG_DEVICE_FEATURES: - case VIRTIO_CONFIG_QUEUE_SIZE: - case VIRTIO_CONFIG_ISR_STATUS: - log_warnx("%s: illegal write %x to %s", __progname, data, - virtio_reg_name(msg->reg)); + switch (reg & 0xFF00) { + case VIO1_CFG_BAR_OFFSET: + (void)virtio_io_cfg(dev, VEI_DIR_OUT, (reg & 0x00FF), data, sz); break; - case VIRTIO_CONFIG_GUEST_FEATURES: - vioblk->cfg.guest_feature = data; + case VIO1_DEV_BAR_OFFSET: + /* Ignore all writes to device configuration registers. */ break; - case VIRTIO_CONFIG_QUEUE_PFN: - vioblk->cfg.queue_pfn = data; - vioblk_update_qa(vioblk); + case VIO1_NOTIFY_BAR_OFFSET: + intr = vioblk_notifyq(dev, (uint16_t)(msg->data)); break; - case VIRTIO_CONFIG_QUEUE_SELECT: - vioblk->cfg.queue_select = data; - vioblk_update_qs(vioblk); + case VIO1_ISR_BAR_OFFSET: + /* Ignore writes to ISR. */ break; - case VIRTIO_CONFIG_QUEUE_NOTIFY: - /* XXX We should be stricter about status checks. */ - if (!(vioblk->cfg.device_status & DEVICE_NEEDS_RESET)) { - vioblk->cfg.queue_notify = data; - if (vioblk_notifyq(vioblk)) - intr = 1; - } - break; - case VIRTIO_CONFIG_DEVICE_STATUS: - vioblk->cfg.device_status = data; - if (vioblk->cfg.device_status == 0) { - vioblk->cfg.guest_feature = 0; - vioblk->cfg.queue_pfn = 0; - vioblk_update_qa(vioblk); - vioblk->cfg.queue_size = 0; - vioblk_update_qs(vioblk); - vioblk->cfg.queue_select = 0; - vioblk->cfg.queue_notify = 0; - vioblk->cfg.isr_status = 0; - vioblk->vq[0].last_avail = 0; - vioblk->vq[0].notified_avail = 0; - virtio_deassert_irq(dev, msg->vcpu); - } - break; default: - break; + log_debug("%s: no handler for reg 0x%04x", __func__, reg); } + return (intr); } static uint32_t -handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr) +vio1_read(struct virtio_dev *dev, struct viodev_msg *msg, int *deassert) { - struct vioblk_dev *vioblk = &dev->vioblk; + uint32_t data = (uint32_t)(-1); + uint16_t reg = msg->reg; uint8_t sz = msg->io_sz; - uint32_t data; - if (msg->data_valid) - data = msg->data; - else - data = 0; - - switch (msg->reg) { - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: - switch (sz) { - case 4: - data = (uint32_t)(vioblk->capacity); - break; - case 2: - data &= 0xFFFF0000; - data |= (uint32_t)(vioblk->capacity) & 0xFFFF; - break; - case 1: - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->capacity) & 0xFF; - break; - } - /* XXX handle invalid sz */ + switch (reg & 0xFF00) { + case VIO1_CFG_BAR_OFFSET: + data = virtio_io_cfg(dev, VEI_DIR_IN, (uint8_t)reg, 0, sz); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: - if (sz == 1) { - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->capacity >> 8) & 0xFF; - } - /* XXX handle invalid sz */ + case VIO1_DEV_BAR_OFFSET: + data = vioblk_dev_read(dev, msg); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: - if (sz == 1) { - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->capacity >> 16) & 0xFF; - } else if (sz == 2) { - data &= 0xFFFF0000; - data |= (uint32_t)(vioblk->capacity >> 16) & 0xFFFF; - } - /* XXX handle invalid sz */ + case VIO1_NOTIFY_BAR_OFFSET: + /* Reads of notify register return all 1's. */ break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: - if (sz == 1) { - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->capacity >> 24) & 0xFF; - } - /* XXX handle invalid sz */ + case VIO1_ISR_BAR_OFFSET: + data = dev->isr; + dev->isr = 0; + *deassert = 1; break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: - switch (sz) { - case 4: - data = (uint32_t)(vioblk->capacity >> 32); + default: + log_debug("%s: no handler for reg 0x%04x", __func__, reg); + } + + return (data); +} + +static uint32_t +vioblk_dev_read(struct virtio_dev *dev, struct viodev_msg *msg) +{ + struct vioblk_dev *vioblk = (struct vioblk_dev *)&dev->vioblk; + uint32_t data = (uint32_t)(-1); + uint16_t reg = msg->reg; + uint8_t sz = msg->io_sz; + + switch (reg & 0xFF) { + case VIRTIO_BLK_CONFIG_CAPACITY: + if (sz != 4) { + log_warnx("%s: unaligned read from capacity register", + __func__); break; - case 2: - data &= 0xFFFF0000; - data |= (uint32_t)(vioblk->capacity >> 32) & 0xFFFF; - break; - case 1: - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->capacity >> 32) & 0xFF; - break; } - /* XXX handle invalid sz */ + data = (uint32_t)(0xFFFFFFFF & vioblk->capacity); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: - if (sz == 1) { - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->capacity >> 40) & 0xFF; + case VIRTIO_BLK_CONFIG_CAPACITY + 4: + if (sz != 4) { + log_warnx("%s: unaligned read from capacity register", + __func__); + break; } - /* XXX handle invalid sz */ + data = (uint32_t)(vioblk->capacity >> 32); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6: - if (sz == 1) { - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->capacity >> 48) & 0xFF; - } else if (sz == 2) { - data &= 0xFFFF0000; - data |= (uint32_t)(vioblk->capacity >> 48) & 0xFFFF; - } - /* XXX handle invalid sz */ - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7: - if (sz == 1) { - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->capacity >> 56) & 0xFF; - } - /* XXX handle invalid sz */ - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: - switch (sz) { - case 4: - data = (uint32_t)(vioblk->seg_max); + case VIRTIO_BLK_CONFIG_SEG_MAX: + if (sz != 4) { + log_warnx("%s: unaligned read from segment max " + "register", __func__); break; - case 2: - data &= 0xFFFF0000; - data |= (uint32_t)(vioblk->seg_max) & 0xFFFF; - break; - case 1: - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->seg_max) & 0xFF; - break; } - /* XXX handle invalid sz */ + data = vioblk->seg_max; break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 13: - if (sz == 1) { - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->seg_max >> 8) & 0xFF; - } - /* XXX handle invalid sz */ + case VIRTIO_BLK_CONFIG_GEOMETRY_C: + case VIRTIO_BLK_CONFIG_GEOMETRY_H: + case VIRTIO_BLK_CONFIG_GEOMETRY_S: + /* + * SeaBIOS unconditionally reads without checking the + * geometry feature flag. + */ break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 14: - if (sz == 1) { - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->seg_max >> 16) & 0xFF; - } else if (sz == 2) { - data &= 0xFFFF0000; - data |= (uint32_t)(vioblk->seg_max >> 16) - & 0xFFFF; - } - /* XXX handle invalid sz */ - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 15: - if (sz == 1) { - data &= 0xFFFFFF00; - data |= (uint32_t)(vioblk->seg_max >> 24) & 0xFF; - } - /* XXX handle invalid sz */ - break; - case VIRTIO_CONFIG_DEVICE_FEATURES: - data = vioblk->cfg.device_feature; - break; - case VIRTIO_CONFIG_GUEST_FEATURES: - data = vioblk->cfg.guest_feature; - break; - case VIRTIO_CONFIG_QUEUE_PFN: - data = vioblk->cfg.queue_pfn; - break; - case VIRTIO_CONFIG_QUEUE_SIZE: - data = vioblk->cfg.queue_size; - break; - case VIRTIO_CONFIG_QUEUE_SELECT: - data = vioblk->cfg.queue_select; - break; - case VIRTIO_CONFIG_QUEUE_NOTIFY: - data = vioblk->cfg.queue_notify; - break; - case VIRTIO_CONFIG_DEVICE_STATUS: - data = vioblk->cfg.device_status; - break; - case VIRTIO_CONFIG_ISR_STATUS: - data = vioblk->cfg.isr_status; - vioblk->cfg.isr_status = 0; - if (intr != NULL) - *intr = INTR_STATE_DEASSERT; - break; default: - return (0xFFFFFFFF); + log_warnx("%s: invalid register 0x%04x", __func__, reg); + return (uint32_t)(-1); } return (data); @@ -791,8 +641,8 @@ handle_io_read(struct viodev_msg *msg, struct virtio_d * On error, returns -1 and descriptor (desc) remains at its current position. */ static ssize_t -vioblk_rw(struct vioblk_dev *dev, int is_write, off_t offset, - struct vring_desc *desc_tbl, struct vring_desc **desc) +vioblk_rw(struct vioblk_dev *dev, struct virtio_vq_info *vq_info, int is_write, + off_t offset, struct vring_desc *desc_tbl, struct vring_desc **desc) { struct iovec *iov = NULL; ssize_t sz = 0; @@ -830,7 +680,7 @@ vioblk_rw(struct vioblk_dev *dev, int is_write, off_t } /* Advance to the next descriptor. */ - *desc = &desc_tbl[(*desc)->next & VIOBLK_QUEUE_MASK]; + *desc = &desc_tbl[(*desc)->next & vq_info->mask]; } while ((*desc)->flags & VRING_DESC_F_NEXT); /* blob - b55efb57613e670d9160a3d063b77ab2d4c384c0 blob + 8c6406fc030a66f82ce5478e0069b28c75b6db88 --- usr.sbin/vmd/vionet.c +++ usr.sbin/vmd/vionet.c @@ -38,6 +38,18 @@ #include "virtio.h" #include "vmd.h" +#define VIONET_DEBUG 1 +#ifdef DPRINTF +#undef DPRINTF +#endif +#if VIONET_DEBUG +#define DPRINTF log_debug +#else +#define DPRINTF(x...) do {} while(0) +#endif /* VIONET_DEBUG */ + +#define VIRTIO_NET_CONFIG_MAC 0 /* 8 bit x 6 byte */ + #define VIRTIO_NET_F_MAC (1 << 5) #define RXQ 0 #define TXQ 1 @@ -52,17 +64,20 @@ struct packet { static void *rx_run_loop(void *); static void *tx_run_loop(void *); -static int vionet_rx(struct vionet_dev *, int); +static int vionet_rx(struct virtio_dev *, int); static ssize_t vionet_rx_copy(struct vionet_dev *, int, const struct iovec *, int, size_t); static ssize_t vionet_rx_zerocopy(struct vionet_dev *, int, const struct iovec *, int); static void vionet_rx_event(int, short, void *); -static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *, - int8_t *); -static void handle_io_write(struct viodev_msg *, struct virtio_dev *); +static uint32_t vio1_read(struct virtio_dev *, struct viodev_msg *, int *); +static void vio1_write(struct virtio_dev *, struct viodev_msg *); +static uint32_t vionet_cfg_read(struct virtio_dev *, struct viodev_msg *); +static void vionet_cfg_write(struct virtio_dev *, struct viodev_msg *); + static int vionet_tx(struct virtio_dev *); -static void vionet_notifyq(struct virtio_dev *); +static void vionet_notifyq(struct virtio_dev *, uint16_t); +static uint32_t vionet_dev_read(struct virtio_dev *, struct viodev_msg *); static void dev_dispatch_vm(int, short, void *); static void handle_sync_io(int, short, void *); static void read_pipe_main(int, short, void *); @@ -85,8 +100,8 @@ struct vm_dev_pipe pipe_tx; int pipe_inject[2]; #define READ 0 #define WRITE 1 -struct iovec iov_rx[VIONET_QUEUE_SIZE]; -struct iovec iov_tx[VIONET_QUEUE_SIZE]; +struct iovec iov_rx[VIRTIO_QUEUE_SIZE_MAX]; +struct iovec iov_tx[VIRTIO_QUEUE_SIZE_MAX]; pthread_rwlock_t lock = NULL; /* Guards device config state. */ int resetting = 0; /* Transient reset state used to coordinate reset. */ int rx_enabled = 0; /* 1: we expect to read the tap, 0: wait for notify. */ @@ -287,56 +302,6 @@ fail: } /* - * Update the gpa and hva of the virtqueue. - */ -static void -vionet_update_qa(struct vionet_dev *dev) -{ - struct virtio_vq_info *vq_info; - void *hva = NULL; - - /* Invalid queue? */ - if (dev->cfg.queue_select > 1) - return; - - vq_info = &dev->vq[dev->cfg.queue_select]; - vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; - dev->cfg.queue_pfn = vq_info->q_gpa >> 12; - - if (vq_info->q_gpa == 0) - vq_info->q_hva = NULL; - - hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIONET_QUEUE_SIZE)); - if (hva == NULL) - fatalx("%s: hva == NULL", __func__); - - vq_info->q_hva = hva; -} - -/* - * Update the queue size. - */ -static void -vionet_update_qs(struct vionet_dev *dev) -{ - struct virtio_vq_info *vq_info; - - /* Invalid queue? */ - if (dev->cfg.queue_select > 1) { - log_warnx("%s: !!! invalid queue selector %d", __func__, - dev->cfg.queue_select); - dev->cfg.queue_size = 0; - return; - } - - vq_info = &dev->vq[dev->cfg.queue_select]; - - /* Update queue pfn/size based on queue select */ - dev->cfg.queue_pfn = vq_info->q_gpa >> 12; - dev->cfg.queue_size = vq_info->qs; -} - -/* * vionet_rx * * Pull packet from the provided fd and fill the receive-side virtqueue. We @@ -346,21 +311,23 @@ vionet_update_qs(struct vionet_dev *dev) * or 0 if no notification is needed. */ static int -vionet_rx(struct vionet_dev *dev, int fd) +vionet_rx(struct virtio_dev *dev, int fd) { uint16_t idx, hdr_idx; char *vr = NULL; size_t chain_len = 0, iov_cnt; + struct vionet_dev *vionet = &dev->vionet; struct vring_desc *desc, *table; struct vring_avail *avail; struct vring_used *used; + struct virtio_net_hdr *hdr = NULL; struct virtio_vq_info *vq_info; struct iovec *iov; int notify = 0; ssize_t sz; uint8_t status = 0; - status = dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK; + status = dev->status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK; if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) { log_warnx("%s: driver not ready", __func__); return (0); @@ -379,8 +346,8 @@ vionet_rx(struct vionet_dev *dev, int fd) used->flags |= VRING_USED_F_NO_NOTIFY; while (idx != avail->idx) { - hdr_idx = avail->ring[idx & VIONET_QUEUE_MASK]; - desc = &table[hdr_idx & VIONET_QUEUE_MASK]; + hdr_idx = avail->ring[idx & vq_info->mask]; + desc = &table[hdr_idx & vq_info->mask]; if (!DESC_WRITABLE(desc)) { log_warnx("%s: invalid descriptor state", __func__); goto reset; @@ -407,7 +374,8 @@ vionet_rx(struct vionet_dev *dev, int fd) iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len); if (iov->iov_base == NULL) goto reset; - memset(iov->iov_base, 0, sizeof(struct virtio_net_hdr)); + hdr = iov->iov_base; + memset(hdr, 0, sizeof(struct virtio_net_hdr)); /* Tweak the iovec to account for the virtio_net_hdr. */ iov->iov_len -= sizeof(struct virtio_net_hdr); @@ -422,7 +390,7 @@ vionet_rx(struct vionet_dev *dev, int fd) * and lengths. */ while (desc->flags & VRING_DESC_F_NEXT) { - desc = &table[desc->next & VIONET_QUEUE_MASK]; + desc = &table[desc->next & vq_info->mask]; if (!DESC_WRITABLE(desc)) { log_warnx("%s: invalid descriptor state", __func__); @@ -452,15 +420,17 @@ vionet_rx(struct vionet_dev *dev, int fd) goto reset; } + hdr->num_buffers = iov_cnt; + /* * If we're enforcing hardware address or handling an injected * packet, we need to use a copy-based approach. */ - if (dev->lockedmac || fd != dev->data_fd) - sz = vionet_rx_copy(dev, fd, iov_rx, iov_cnt, + if (vionet->lockedmac || fd != vionet->data_fd) + sz = vionet_rx_copy(vionet, fd, iov_rx, iov_cnt, chain_len); else - sz = vionet_rx_zerocopy(dev, fd, iov_rx, iov_cnt); + sz = vionet_rx_zerocopy(vionet, fd, iov_rx, iov_cnt); if (sz == -1) goto reset; if (sz == 0) /* No packets, so bail out for now. */ @@ -473,8 +443,8 @@ vionet_rx(struct vionet_dev *dev, int fd) sz += sizeof(struct virtio_net_hdr); /* Mark our buffers as used. */ - used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx; - used->ring[used->idx & VIONET_QUEUE_MASK].len = sz; + used->ring[used->idx & vq_info->mask].id = hdr_idx; + used->ring[used->idx & vq_info->mask].len = sz; __sync_synchronize(); used->idx++; idx++; @@ -630,14 +600,13 @@ static void vionet_rx_event(int fd, short event, void *arg) { struct virtio_dev *dev = (struct virtio_dev *)arg; - struct vionet_dev *vionet = &dev->vionet; int ret = 0; if (!(event & EV_READ)) fatalx("%s: invalid event type", __func__); pthread_rwlock_rdlock(&lock); - ret = vionet_rx(vionet, fd); + ret = vionet_rx(dev, fd); pthread_rwlock_unlock(&lock); if (ret == 0) { @@ -648,12 +617,12 @@ vionet_rx_event(int fd, short event, void *arg) pthread_rwlock_wrlock(&lock); if (ret == 1) { /* Notify the driver. */ - vionet->cfg.isr_status |= 1; + dev->isr |= 1; } else { /* Need a reset. Something went wrong. */ log_warnx("%s: requesting device reset", __func__); - vionet->cfg.device_status |= DEVICE_NEEDS_RESET; - vionet->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; + dev->status |= DEVICE_NEEDS_RESET; + dev->isr |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; } pthread_rwlock_unlock(&lock); @@ -661,11 +630,9 @@ vionet_rx_event(int fd, short event, void *arg) } static void -vionet_notifyq(struct virtio_dev *dev) +vionet_notifyq(struct virtio_dev *dev, uint16_t vq_idx) { - struct vionet_dev *vionet = &dev->vionet; - - switch (vionet->cfg.queue_notify) { + switch (vq_idx) { case RXQ: rx_enabled = 1; vm_pipe_send(&pipe_rx, VIRTIO_NOTIFY); @@ -679,7 +646,7 @@ vionet_notifyq(struct virtio_dev *dev) * well as any bogus queue IDs. */ log_debug("%s: notify for unimplemented queue ID %d", - __func__, vionet->cfg.queue_notify); + __func__, dev->cfg.queue_notify); break; } } @@ -702,14 +669,13 @@ vionet_tx(struct virtio_dev *dev) struct packet pkt; uint8_t status = 0; - status = vionet->cfg.device_status - & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK; + status = dev->status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK; if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) { log_warnx("%s: driver not ready", __func__); return (0); } - vq_info = &vionet->vq[TXQ]; + vq_info = &dev->vq[TXQ]; idx = vq_info->last_avail; vr = vq_info->q_hva; if (vr == NULL) @@ -721,8 +687,8 @@ vionet_tx(struct virtio_dev *dev) used = (struct vring_used *)(vr + vq_info->vq_usedoffset); while (idx != avail->idx) { - hdr_idx = avail->ring[idx & VIONET_QUEUE_MASK]; - desc = &table[hdr_idx & VIONET_QUEUE_MASK]; + hdr_idx = avail->ring[idx & vq_info->mask]; + desc = &table[hdr_idx & vq_info->mask]; if (DESC_WRITABLE(desc)) { log_warnx("%s: invalid descriptor state", __func__); goto reset; @@ -733,22 +699,24 @@ vionet_tx(struct virtio_dev *dev) chain_len = 0; /* - * As a legacy device, we most likely will receive a lead - * descriptor sized to the virtio_net_hdr. However, the framing - * is not guaranteed, so check for packet data. + * We do not negotiate VIRTIO_NET_F_HASH_REPORT so we + * assume the header length is fixed. */ - iov->iov_len = desc->len; - if (iov->iov_len < sizeof(struct virtio_net_hdr)) { + if (desc->len < sizeof(struct virtio_net_hdr)) { log_warnx("%s: invalid descriptor length", __func__); goto reset; - } else if (iov->iov_len > sizeof(struct virtio_net_hdr)) { + } + iov->iov_len = desc->len; + + if (iov->iov_len > sizeof(struct virtio_net_hdr)) { /* Chop off the virtio header, leaving packet data. */ iov->iov_len -= sizeof(struct virtio_net_hdr); - chain_len += iov->iov_len; iov->iov_base = hvaddr_mem(desc->addr + sizeof(struct virtio_net_hdr), iov->iov_len); if (iov->iov_base == NULL) goto reset; + + chain_len += iov->iov_len; iov_cnt++; } @@ -756,7 +724,7 @@ vionet_tx(struct virtio_dev *dev) * Walk the chain and collect remaining addresses and lengths. */ while (desc->flags & VRING_DESC_F_NEXT) { - desc = &table[desc->next & VIONET_QUEUE_MASK]; + desc = &table[desc->next & vq_info->mask]; if (DESC_WRITABLE(desc)) { log_warnx("%s: invalid descriptor state", __func__); @@ -826,8 +794,8 @@ vionet_tx(struct virtio_dev *dev) } chain_len += sizeof(struct virtio_net_hdr); drop: - used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx; - used->ring[used->idx & VIONET_QUEUE_MASK].len = chain_len; + used->ring[used->idx & vq_info->mask].id = hdr_idx; + used->ring[used->idx & vq_info->mask].len = chain_len; __sync_synchronize(); used->idx++; idx++; @@ -848,8 +816,6 @@ drop: __func__); free(pkt.buf); } - log_debug("%s: injected dhcp reply with %ld bytes", - __func__, sz); } } @@ -857,7 +823,6 @@ drop: !(avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) notify = 1; - vq_info->last_avail = idx; return (notify); reset: @@ -949,7 +914,7 @@ handle_sync_io(int fd, short event, void *arg) struct viodev_msg msg; struct imsg imsg; ssize_t n; - int8_t intr = INTR_STATE_NOOP; + int deassert = 0; if (event & EV_READ) { if ((n = imsgbuf_read(ibuf)) == -1) @@ -989,15 +954,16 @@ handle_sync_io(int fd, short event, void *arg) switch (msg.type) { case VIODEV_MSG_IO_READ: /* Read IO: make sure to send a reply */ - msg.data = handle_io_read(&msg, dev, &intr); + msg.data = vio1_read(dev, &msg, &deassert); msg.data_valid = 1; - msg.state = intr; + if (deassert) + msg.state = INTR_STATE_DEASSERT; imsg_compose_event2(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, sizeof(msg), ev_base_main); break; case VIODEV_MSG_IO_WRITE: /* Write IO: no reply needed */ - handle_io_write(&msg, dev); + vio1_write(dev, &msg); break; case VIODEV_MSG_SHUTDOWN: event_del(&dev->sync_iev.ev); @@ -1010,49 +976,291 @@ handle_sync_io(int fd, short event, void *arg) imsg_event_add2(iev, ev_base_main); } +static uint32_t +vionet_cfg_read(struct virtio_dev *dev, struct viodev_msg *msg) +{ + struct virtio_pci_common_cfg *pci_cfg = &dev->pci_cfg; + uint32_t data = (uint32_t)(-1); + uint16_t reg = msg->reg & 0x00FF; + + pthread_rwlock_rdlock(&lock); + switch (reg) { + case VIO1_PCI_DEVICE_FEATURE_SELECT: + data = pci_cfg->device_feature_select; + break; + case VIO1_PCI_DEVICE_FEATURE: + if (pci_cfg->device_feature_select == 0) + data = dev->device_feature & (uint32_t)(-1); + else if (pci_cfg->device_feature_select == 1) + data = dev->device_feature >> 32; + else { + DPRINTF("%s: ignoring device feature read", + __func__); + } + break; + case VIO1_PCI_DRIVER_FEATURE_SELECT: + data = pci_cfg->driver_feature_select; + break; + case VIO1_PCI_DRIVER_FEATURE: + if (pci_cfg->driver_feature_select == 0) + data = dev->driver_feature & (uint32_t)(-1); + else if (pci_cfg->driver_feature_select == 1) + data = dev->driver_feature >> 32; + else { + DPRINTF("%s: ignoring driver feature read", + __func__); + } + break; + case VIO1_PCI_CONFIG_MSIX_VECTOR: + data = VIRTIO_MSI_NO_VECTOR; /* Unsupported */ + break; + case VIO1_PCI_NUM_QUEUES: + data = dev->num_queues; + break; + case VIO1_PCI_DEVICE_STATUS: + data = dev->status; + break; + case VIO1_PCI_CONFIG_GENERATION: + data = pci_cfg->config_generation; + break; + case VIO1_PCI_QUEUE_SELECT: + data = pci_cfg->queue_select; + break; + case VIO1_PCI_QUEUE_SIZE: + data = pci_cfg->queue_size; + break; + case VIO1_PCI_QUEUE_MSIX_VECTOR: + data = VIRTIO_MSI_NO_VECTOR; /* Unsupported */ + break; + case VIO1_PCI_QUEUE_ENABLE: + data = pci_cfg->queue_enable; + break; + case VIO1_PCI_QUEUE_NOTIFY_OFF: + data = pci_cfg->queue_notify_off; + break; + case VIO1_PCI_QUEUE_DESC: + data = (uint32_t)(0xFFFFFFFF & pci_cfg->queue_desc); + break; + case VIO1_PCI_QUEUE_DESC + 4: + data = (uint32_t)(pci_cfg->queue_desc >> 32); + break; + case VIO1_PCI_QUEUE_AVAIL: + data = (uint32_t)(0xFFFFFFFF & pci_cfg->queue_avail); + break; + case VIO1_PCI_QUEUE_AVAIL + 4: + data = (uint32_t)(pci_cfg->queue_avail >> 32); + break; + case VIO1_PCI_QUEUE_USED: + data = (uint32_t)(0xFFFFFFFF & pci_cfg->queue_used); + break; + case VIO1_PCI_QUEUE_USED + 4: + data = (uint32_t)(pci_cfg->queue_used >> 32); + break; + default: + log_warnx("%s: invalid register 0x%04x", __func__, reg); + } + pthread_rwlock_unlock(&lock); + + return (data); +} + static void -handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev) +vionet_cfg_write(struct virtio_dev *dev, struct viodev_msg *msg) { - struct vionet_dev *vionet = &dev->vionet; - uint32_t data = msg->data; - int pause_devices = 0; + struct virtio_pci_common_cfg *pci_cfg = &dev->pci_cfg; + uint32_t data = msg->data; + uint16_t reg = msg->reg & 0xFF; + uint8_t sz = msg->io_sz; + int i, pause_devices = 0; + DPRINTF("%s: write reg=%d data=0x%x", __func__, msg->reg, data); + pthread_rwlock_wrlock(&lock); - - switch (msg->reg) { - case VIRTIO_CONFIG_DEVICE_FEATURES: - case VIRTIO_CONFIG_QUEUE_SIZE: - case VIRTIO_CONFIG_ISR_STATUS: - log_warnx("%s: illegal write %x to %s", __progname, data, - virtio_reg_name(msg->reg)); + switch (reg) { + case VIO1_PCI_DEVICE_FEATURE_SELECT: + if (sz != 4) + log_warnx("%s: unaligned write to device " + "feature select (sz=%u)", __func__, sz); + else + pci_cfg->device_feature_select = data; break; - case VIRTIO_CONFIG_GUEST_FEATURES: - vionet->cfg.guest_feature = data; + case VIO1_PCI_DEVICE_FEATURE: + log_warnx("%s: illegal write to device feature " + "register", __progname); break; - case VIRTIO_CONFIG_QUEUE_PFN: - vionet->cfg.queue_pfn = data; - vionet_update_qa(vionet); + case VIO1_PCI_DRIVER_FEATURE_SELECT: + if (sz != 4) + log_warnx("%s: unaligned write to driver " + "feature select register (sz=%u)", __func__, + sz); + else + pci_cfg->driver_feature_select = data; break; - case VIRTIO_CONFIG_QUEUE_SELECT: - vionet->cfg.queue_select = data; - vionet_update_qs(vionet); + case VIO1_PCI_DRIVER_FEATURE: + if (sz != 4) { + log_warnx("%s: unaligned write to driver " + "feature register (sz=%u)", __func__, sz); + break; + } + if (pci_cfg->driver_feature_select > 1) { + /* We only support a 64-bit feature space. */ + DPRINTF("%s: ignoring driver feature write", + __func__); + break; + } + pci_cfg->driver_feature = data; + if (pci_cfg->driver_feature_select == 0) + dev->driver_feature |= pci_cfg->driver_feature; + else + dev->driver_feature |= + ((uint64_t)pci_cfg->driver_feature << 32); + dev->driver_feature &= dev->device_feature; + DPRINTF("%s: driver features 0x%llx", __func__, + dev->driver_feature); break; - case VIRTIO_CONFIG_QUEUE_NOTIFY: - vionet->cfg.queue_notify = data; - vionet_notifyq(dev); + case VIO1_PCI_CONFIG_MSIX_VECTOR: + /* Ignore until we support MSIX. */ break; - case VIRTIO_CONFIG_DEVICE_STATUS: - if (data == 0) { - resetting = 2; /* Wait on two acks: rx & tx */ + case VIO1_PCI_NUM_QUEUES: + log_warnx("%s: illegal write to num queues register", + __progname); + break; + case VIO1_PCI_DEVICE_STATUS: + if (sz != 1) { + log_warnx("%s: unaligned write to device " + "status register (sz=%u)", __func__, sz); + break; + } + dev->status = data; + if (dev->status == 0) { + /* Reset device and virtqueues (if any). */ + dev->driver_feature = 0; + dev->isr = 0; + + pci_cfg->queue_select = 0; + virtio_update_qs(dev); + + if (dev->num_queues > 0) { + /* + * Reset virtqueues to initial state and + * set to disabled status. Clear PCI + * configuration registers. + */ + for (i = 0; i < dev->num_queues; i++) + virtio_vq_init(dev, i); + } + + resetting = 2; /* Wait on two acks: rx & tx */ pause_devices = 1; - } else { - // XXX is this correct? - vionet->cfg.device_status = data; } + DPRINTF("%s: dev %u status [%s%s%s%s%s%s]", __func__, + dev->pci_id, + (data & VIRTIO_CONFIG_DEVICE_STATUS_ACK) ? + "[ack]" : "", + (data & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER) ? + "[driver]" : "", + (data & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) ? + "[driver ok]" : "", + (data & VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) ? + "[features ok]" : "", + (data & VIRTIO_CONFIG_DEVICE_STATUS_DEVICE_NEEDS_RESET) + ? "[needs reset]" : "", + (data & VIRTIO_CONFIG_DEVICE_STATUS_FAILED) ? + "[failed]" : ""); break; + case VIO1_PCI_CONFIG_GENERATION: + log_warnx("%s: illegal write to config generation " + "register", __progname); + break; + case VIO1_PCI_QUEUE_SELECT: + pci_cfg->queue_select = data; + virtio_update_qs(dev); + break; + case VIO1_PCI_QUEUE_SIZE: + if (data <= VIRTIO_QUEUE_SIZE_MAX) + pci_cfg->queue_size = data; + else { + log_warnx("%s: clamping queue size", __func__); + pci_cfg->queue_size = VIRTIO_QUEUE_SIZE_MAX; + } + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_MSIX_VECTOR: + /* Ignore until we support MSI-X. */ + break; + case VIO1_PCI_QUEUE_ENABLE: + pci_cfg->queue_enable = data; + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_NOTIFY_OFF: + log_warnx("%s: illegal write to queue notify offset " + "register", __progname); + break; + case VIO1_PCI_QUEUE_DESC: + if (sz != 4) { + log_warnx("%s: unaligned write to queue " + "desc. register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_desc &= 0xffffffff00000000; + pci_cfg->queue_desc |= (uint64_t)data; + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_DESC + 4: + if (sz != 4) { + log_warnx("%s: unaligned write to queue " + "desc. register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_desc &= 0x00000000ffffffff; + pci_cfg->queue_desc |= ((uint64_t)data << 32); + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_AVAIL: + if (sz != 4) { + log_warnx("%s: unaligned write to queue " + "available register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_avail &= 0xffffffff00000000; + pci_cfg->queue_avail |= (uint64_t)data; + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_AVAIL + 4: + if (sz != 4) { + log_warnx("%s: unaligned write to queue " + "available register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_avail &= 0x00000000ffffffff; + pci_cfg->queue_avail |= ((uint64_t)data << 32); + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_USED: + if (sz != 4) { + log_warnx("%s: unaligned write to queue used " + "register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_used &= 0xffffffff00000000; + pci_cfg->queue_used |= (uint64_t)data; + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_USED + 4: + if (sz != 4) { + log_warnx("%s: unaligned write to queue used " + "register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_used &= 0x00000000ffffffff; + pci_cfg->queue_used |= ((uint64_t)data << 32); + virtio_update_qa(dev); + break; + default: + log_warnx("%s: invalid register 0x%04x", __func__, reg); } - pthread_rwlock_unlock(&lock); + if (pause_devices) { rx_enabled = 0; vionet_deassert_pic_irq(dev); @@ -1062,60 +1270,82 @@ handle_io_write(struct viodev_msg *msg, struct virtio_ } static uint32_t -handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr) +vio1_read(struct virtio_dev *dev, struct viodev_msg *msg, int *deassert) { - struct vionet_dev *vionet = &dev->vionet; - uint32_t data; + uint32_t data = (uint32_t)(-1); + uint16_t reg = msg->reg; - pthread_rwlock_rdlock(&lock); - - switch (msg->reg) { - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: - data = vionet->mac[msg->reg - - VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI]; + switch (reg & 0xFF00) { + case VIO1_CFG_BAR_OFFSET: + data = vionet_cfg_read(dev, msg); break; - case VIRTIO_CONFIG_DEVICE_FEATURES: - data = vionet->cfg.device_feature; + case VIO1_DEV_BAR_OFFSET: + data = vionet_dev_read(dev, msg); break; - case VIRTIO_CONFIG_GUEST_FEATURES: - data = vionet->cfg.guest_feature; + case VIO1_NOTIFY_BAR_OFFSET: + /* Reads of notify register return all 1's. */ break; - case VIRTIO_CONFIG_QUEUE_PFN: - data = vionet->cfg.queue_pfn; - break; - case VIRTIO_CONFIG_QUEUE_SIZE: - data = vionet->cfg.queue_size; - break; - case VIRTIO_CONFIG_QUEUE_SELECT: - data = vionet->cfg.queue_select; - break; - case VIRTIO_CONFIG_QUEUE_NOTIFY: - data = vionet->cfg.queue_notify; - break; - case VIRTIO_CONFIG_DEVICE_STATUS: - data = vionet->cfg.device_status; - break; - case VIRTIO_CONFIG_ISR_STATUS: - pthread_rwlock_unlock(&lock); + case VIO1_ISR_BAR_OFFSET: pthread_rwlock_wrlock(&lock); - data = vionet->cfg.isr_status; - vionet->cfg.isr_status = 0; - if (intr != NULL) - *intr = INTR_STATE_DEASSERT; + data = dev->isr; + dev->isr = 0; + *deassert = 1; + pthread_rwlock_unlock(&lock); break; default: - data = 0xFFFFFFFF; + log_debug("%s: no handler for reg 0x%04x", __func__, reg); } - pthread_rwlock_unlock(&lock); return (data); } +static void +vio1_write(struct virtio_dev *dev, struct viodev_msg *msg) +{ + uint16_t reg = msg->reg; + + switch (reg & 0xFF00) { + case VIO1_CFG_BAR_OFFSET: + (void)vionet_cfg_write(dev, msg); + break; + case VIO1_DEV_BAR_OFFSET: + /* Ignore all writes to device configuration registers. */ + break; + case VIO1_NOTIFY_BAR_OFFSET: + vionet_notifyq(dev, (uint16_t)(msg->data)); + break; + case VIO1_ISR_BAR_OFFSET: + /* ignore writes to ISR. */ + break; + default: + log_debug("%s: no handler for reg 0x%04x", __func__, reg); + } +} + +static uint32_t +vionet_dev_read(struct virtio_dev *dev, struct viodev_msg *msg) +{ + struct vionet_dev *vionet = (struct vionet_dev *)&dev->vionet; + uint32_t data = (uint32_t)(-1); + uint16_t reg = msg->reg & 0xFF; + + switch (reg) { + case VIRTIO_NET_CONFIG_MAC: + case VIRTIO_NET_CONFIG_MAC + 1: + case VIRTIO_NET_CONFIG_MAC + 2: + case VIRTIO_NET_CONFIG_MAC + 3: + case VIRTIO_NET_CONFIG_MAC + 4: + case VIRTIO_NET_CONFIG_MAC + 5: + data = (uint8_t)vionet->mac[reg - VIRTIO_NET_CONFIG_MAC]; + break; + default: + log_warnx("%s: invalid register 0x%04x", __func__, reg); + return (uint32_t)(-1); + } + + return (data); +} + /* * Handle the rx side processing, communicating to the main thread via pipe. */ @@ -1220,7 +1450,6 @@ static void read_pipe_tx(int fd, short event, void *arg) { struct virtio_dev *dev = (struct virtio_dev*)arg; - struct vionet_dev *vionet = &dev->vionet; enum pipe_msg_type msg; int ret = 0; @@ -1260,12 +1489,12 @@ read_pipe_tx(int fd, short event, void *arg) pthread_rwlock_wrlock(&lock); if (ret == 1) { /* Notify the driver. */ - vionet->cfg.isr_status |= 1; + dev->isr |= 1; } else { /* Need a reset. Something went wrong. */ log_warnx("%s: requesting device reset", __func__); - vionet->cfg.device_status |= DEVICE_NEEDS_RESET; - vionet->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; + dev->status |= DEVICE_NEEDS_RESET; + dev->isr |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; } pthread_rwlock_unlock(&lock); @@ -1295,21 +1524,15 @@ read_pipe_main(int fd, short event, void *arg) if (resetting == 0) { log_debug("%s: resetting virtio network device %d", __func__, vionet->idx); - pthread_rwlock_wrlock(&lock); - vionet->cfg.device_status = 0; - vionet->cfg.guest_feature = 0; - vionet->cfg.queue_pfn = 0; - vionet_update_qa(vionet); - vionet->cfg.queue_size = 0; - vionet_update_qs(vionet); - vionet->cfg.queue_select = 0; - vionet->cfg.queue_notify = 0; - vionet->cfg.isr_status = 0; - vionet->vq[RXQ].last_avail = 0; - vionet->vq[RXQ].notified_avail = 0; - vionet->vq[TXQ].last_avail = 0; - vionet->vq[TXQ].notified_avail = 0; + dev->status = 0; + dev->cfg.guest_feature = 0; + dev->cfg.queue_pfn = 0; + dev->cfg.queue_select = 0; + dev->cfg.queue_notify = 0; + dev->isr = 0; + virtio_vq_init(dev, TXQ); + virtio_vq_init(dev, RXQ); pthread_rwlock_unlock(&lock); } break; @@ -1330,7 +1553,7 @@ vionet_assert_pic_irq(struct virtio_dev *dev) memset(&msg, 0, sizeof(msg)); msg.irq = dev->irq; - msg.vcpu = 0; // XXX + msg.vcpu = 0; /* XXX: smp */ msg.type = VIODEV_MSG_KICK; msg.state = INTR_STATE_ASSERT; @@ -1352,7 +1575,7 @@ vionet_deassert_pic_irq(struct virtio_dev *dev) memset(&msg, 0, sizeof(msg)); msg.irq = dev->irq; - msg.vcpu = 0; // XXX + msg.vcpu = 0; /* XXX: smp */ msg.type = VIODEV_MSG_KICK; msg.state = INTR_STATE_DEASSERT; blob - 95bc1f46dee702c630f98249195eacd45443b9b5 blob + 5bf14df65ddb9d92543a0b268dce874cdf13aa1c --- usr.sbin/vmd/vioscsi.c +++ usr.sbin/vmd/vioscsi.c @@ -32,6 +32,16 @@ #include "vioscsi.h" #include "virtio.h" +#define VIOSCSI_DEBUG 0 +#ifdef DPRINTF +#undef DPRINTF +#endif +#if VIOSCSI_DEBUG +#define DPRINTF log_debug +#else +#define DPRINTF(x...) do {} while(0) +#endif /* VIOSCSI_DEBUG */ + extern char *__progname; static void @@ -66,24 +76,24 @@ vioscsi_prepare_resp(struct virtio_scsi_res_hdr *resp, } static struct vring_desc* -vioscsi_next_ring_desc(struct vring_desc* desc, struct vring_desc* cur, - uint16_t *idx) +vioscsi_next_ring_desc(struct virtio_vq_info *vq_info, struct vring_desc* desc, + struct vring_desc* cur, uint16_t *idx) { - *idx = cur->next & VIOSCSI_QUEUE_MASK; + *idx = cur->next & vq_info->mask; return &desc[*idx]; } static void -vioscsi_next_ring_item(struct vioscsi_dev *dev, struct vring_avail *avail, - struct vring_used *used, struct vring_desc *desc, uint16_t idx) +vioscsi_next_ring_item(struct virtio_vq_info *vq_info, + struct vring_avail *avail, struct vring_used *used, struct vring_desc *desc, + uint16_t idx) { - used->ring[used->idx & VIOSCSI_QUEUE_MASK].id = idx; - used->ring[used->idx & VIOSCSI_QUEUE_MASK].len = desc->len; + used->ring[used->idx & vq_info->mask].id = idx; + used->ring[used->idx & vq_info->mask].len = desc->len; __sync_synchronize(); used->idx++; - dev->vq[dev->cfg.queue_notify].last_avail = - avail->idx & VIOSCSI_QUEUE_MASK; + vq_info->last_avail = avail->idx & vq_info->mask; } static const char * @@ -150,31 +160,25 @@ vioscsi_op_names(uint8_t type) } } +#if VIOSCSI_DEBUG static const char * vioscsi_reg_name(uint8_t reg) { switch (reg) { - case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature"; - case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature"; - case VIRTIO_CONFIG_QUEUE_PFN: return "queue pfn"; - case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size"; - case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select"; - case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify"; - case VIRTIO_CONFIG_DEVICE_STATUS: return "device status"; - case VIRTIO_CONFIG_ISR_STATUS: return "isr status"; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: return "num_queues"; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: return "seg_max"; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "max_sectors"; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "cmd_per_lun"; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "event_info_size"; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 20: return "sense_size"; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 24: return "cdb_size"; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 28: return "max_channel"; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 30: return "max_target"; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 32: return "max_lun"; + case VIRTIO_SCSI_CONFIG_NUM_QUEUES: return "NUM_QUEUES"; + case VIRTIO_SCSI_CONFIG_SEG_MAX: return "SEG_MAX"; + case VIRTIO_SCSI_CONFIG_MAX_SECTORS: return "MAX_SECTORS"; + case VIRTIO_SCSI_CONFIG_CMD_PER_LUN: return "CMD_PER_LUN"; + case VIRTIO_SCSI_CONFIG_EVENT_INFO_SIZE: return "EVENT_INFO_SIZE"; + case VIRTIO_SCSI_CONFIG_SENSE_SIZE: return "SENSE_SIZE"; + case VIRTIO_SCSI_CONFIG_CDB_SIZE: return "CDB_SIZE"; + case VIRTIO_SCSI_CONFIG_MAX_CHANNEL: return "MAX_CHANNEL"; + case VIRTIO_SCSI_CONFIG_MAX_TARGET: return "MAX_TARGET"; + case VIRTIO_SCSI_CONFIG_MAX_LUN: return "MAX_LUN"; default: return "unknown"; } } +#endif /* VIOSCSI_DEBUG */ static void vioscsi_free_info(struct ioinfo *info) @@ -186,7 +190,7 @@ vioscsi_free_info(struct ioinfo *info) } static struct ioinfo * -vioscsi_start_read(struct vioscsi_dev *dev, off_t block, size_t n_blocks) +vioscsi_start_read(struct virtio_dev *dev, off_t block, size_t n_blocks) { struct ioinfo *info; @@ -214,10 +218,16 @@ nomem: } static const uint8_t * -vioscsi_finish_read(struct vioscsi_dev *dev, struct ioinfo *info) +vioscsi_finish_read(struct virtio_dev *dev, struct ioinfo *info) { - struct virtio_backing *f = &dev->file; + struct virtio_backing *f = NULL; + struct vioscsi_dev *vioscsi = NULL; + if (dev->device_id != PCI_PRODUCT_VIRTIO_SCSI) + fatalx("%s: virtio device is not a scsi device", __func__); + vioscsi = &dev->vioscsi; + + f = &vioscsi->file; if (f->pread(f->p, info->buf, info->len, info->offset) != info->len) { log_warn("vioscsi read error"); return NULL; @@ -227,16 +237,16 @@ vioscsi_finish_read(struct vioscsi_dev *dev, struct io } static int -vioscsi_handle_tur(struct vioscsi_dev *dev, struct virtio_scsi_req_hdr *req, - struct virtio_vq_acct *acct) +vioscsi_handle_tur(struct virtio_dev *dev, struct virtio_vq_info *vq_info, + struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; memset(&resp, 0, sizeof(resp)); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->req_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->req_desc, &(acct->resp_idx)); vioscsi_prepare_resp(&resp, VIRTIO_SCSI_S_OK, SCSI_OK, 0, 0, 0); @@ -245,9 +255,9 @@ vioscsi_handle_tur(struct vioscsi_dev *dev, struct vir __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -255,7 +265,7 @@ vioscsi_handle_tur(struct vioscsi_dev *dev, struct vir } static int -vioscsi_handle_inquiry(struct vioscsi_dev *dev, +vioscsi_handle_inquiry(struct virtio_dev *dev, struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) { int ret = 0; @@ -289,8 +299,8 @@ vioscsi_handle_inquiry(struct vioscsi_dev *dev, memcpy(inq_data->revision, INQUIRY_REVISION, INQUIRY_REVISION_LEN); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->req_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d at local " "idx %d req_idx %d global_idx %d", __func__, acct->resp_desc->addr, @@ -303,8 +313,8 @@ vioscsi_handle_inquiry(struct vioscsi_dev *dev, } /* Move index for inquiry_data */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->resp_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing inq_data to 0x%llx size %d at " "local idx %d req_idx %d global_idx %d", @@ -318,9 +328,9 @@ vioscsi_handle_inquiry(struct vioscsi_dev *dev, __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -331,8 +341,9 @@ inq_out: } static int -vioscsi_handle_mode_sense(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_mode_sense(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; @@ -400,7 +411,7 @@ vioscsi_handle_mode_sense(struct vioscsi_dev *dev, } /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d " @@ -417,7 +428,7 @@ vioscsi_handle_mode_sense(struct vioscsi_dev *dev, } /* Move index for mode_reply */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing mode_reply to 0x%llx " @@ -437,9 +448,9 @@ vioscsi_handle_mode_sense(struct vioscsi_dev *dev, free(mode_reply); ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } else { mode_sense_error: @@ -449,7 +460,7 @@ mode_sense_error: SENSE_ILLEGAL_CDB_FIELD, SENSE_DEFAULT_ASCQ); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -459,9 +470,9 @@ mode_sense_error: } ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } mode_sense_out: @@ -469,8 +480,9 @@ mode_sense_out: } static int -vioscsi_handle_mode_sense_big(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_mode_sense_big(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; @@ -538,7 +550,7 @@ vioscsi_handle_mode_sense_big(struct vioscsi_dev *dev, } /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d " @@ -555,7 +567,7 @@ vioscsi_handle_mode_sense_big(struct vioscsi_dev *dev, } /* Move index for mode_reply */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing mode_reply to 0x%llx " @@ -575,9 +587,9 @@ vioscsi_handle_mode_sense_big(struct vioscsi_dev *dev, free(mode_reply); ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } else { mode_sense_big_error: @@ -587,7 +599,7 @@ mode_sense_big_error: SENSE_ILLEGAL_CDB_FIELD, SENSE_DEFAULT_ASCQ); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -597,9 +609,9 @@ mode_sense_big_error: } ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } mode_sense_big_out: @@ -607,13 +619,19 @@ mode_sense_big_out: } static int -vioscsi_handle_read_capacity(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_read_capacity(struct virtio_dev *dev, + struct virtio_vq_info *vq_info,struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; struct scsi_read_cap_data *r_cap_data; + struct vioscsi_dev *vioscsi = NULL; + if (dev->device_id != PCI_PRODUCT_VIRTIO_SCSI) + fatalx("%s: virtio device is not a scsi device", __func__); + vioscsi = &dev->vioscsi; + #if DEBUG struct scsi_read_capacity *r_cap = (struct scsi_read_capacity *)(req->cdb); @@ -633,7 +651,7 @@ vioscsi_handle_read_capacity(struct vioscsi_dev *dev, } DPRINTF("%s: ISO has %lld bytes and %lld blocks", - __func__, dev->sz, dev->n_blocks); + __func__, vioscsi->sz, vioscsi->n_blocks); /* * determine if num blocks of iso image > UINT32_MAX @@ -641,20 +659,20 @@ vioscsi_handle_read_capacity(struct vioscsi_dev *dev, * indicating to hosts that READ_CAPACITY_16 should * be called to retrieve the full size */ - if (dev->n_blocks >= UINT32_MAX) { + if (vioscsi->n_blocks >= UINT32_MAX) { _lto4b(UINT32_MAX, r_cap_data->addr); _lto4b(VIOSCSI_BLOCK_SIZE_CDROM, r_cap_data->length); log_warnx("%s: ISO sz %lld is bigger than " "UINT32_MAX %u, all data may not be read", - __func__, dev->sz, UINT32_MAX); + __func__, vioscsi->sz, UINT32_MAX); } else { - _lto4b(dev->n_blocks - 1, r_cap_data->addr); + _lto4b(vioscsi->n_blocks - 1, r_cap_data->addr); _lto4b(VIOSCSI_BLOCK_SIZE_CDROM, r_cap_data->length); } /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->req_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d at local " "idx %d req_idx %d global_idx %d", @@ -668,8 +686,8 @@ vioscsi_handle_read_capacity(struct vioscsi_dev *dev, } /* Move index for r_cap_data */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->resp_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing r_cap_data to 0x%llx size %d at " "local idx %d req_idx %d global_idx %d", @@ -683,9 +701,9 @@ vioscsi_handle_read_capacity(struct vioscsi_dev *dev, __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -696,13 +714,19 @@ read_capacity_out: } static int -vioscsi_handle_read_capacity_16(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_read_capacity_16(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; struct scsi_read_cap_data_16 *r_cap_data_16; + struct vioscsi_dev *vioscsi = NULL; + if (dev->device_id != PCI_PRODUCT_VIRTIO_SCSI) + fatalx("%s: virtio device is not a scsi device", __func__); + vioscsi = &dev->vioscsi; + #if DEBUG struct scsi_read_capacity_16 *r_cap_16 = (struct scsi_read_capacity_16 *)(req->cdb); @@ -722,14 +746,14 @@ vioscsi_handle_read_capacity_16(struct vioscsi_dev *de } DPRINTF("%s: ISO has %lld bytes and %lld blocks", __func__, - dev->sz, dev->n_blocks); + dev->vioscsi.sz, dev->vioscsi.n_blocks); - _lto8b(dev->n_blocks - 1, r_cap_data_16->addr); + _lto8b(vioscsi->n_blocks - 1, r_cap_data_16->addr); _lto4b(VIOSCSI_BLOCK_SIZE_CDROM, r_cap_data_16->length); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->req_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d at local " "idx %d req_idx %d global_idx %d", @@ -743,8 +767,8 @@ vioscsi_handle_read_capacity_16(struct vioscsi_dev *de } /* Move index for r_cap_data_16 */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->resp_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing r_cap_data_16 to 0x%llx size %d " "at local idx %d req_idx %d global_idx %d", @@ -758,9 +782,9 @@ vioscsi_handle_read_capacity_16(struct vioscsi_dev *de __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -771,8 +795,9 @@ read_capacity_16_out: } static int -vioscsi_handle_report_luns(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_report_luns(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; @@ -796,7 +821,7 @@ vioscsi_handle_report_luns(struct vioscsi_dev *dev, SENSE_ILLEGAL_CDB_FIELD, SENSE_DEFAULT_ASCQ); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -805,9 +830,9 @@ vioscsi_handle_report_luns(struct vioscsi_dev *dev, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } goto rpl_out; @@ -828,8 +853,8 @@ vioscsi_handle_report_luns(struct vioscsi_dev *dev, VIRTIO_SCSI_S_OK, SCSI_OK, 0, 0, 0); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->req_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d at local " "idx %d req_idx %d global_idx %d", __func__, acct->resp_desc->addr, @@ -842,8 +867,8 @@ vioscsi_handle_report_luns(struct vioscsi_dev *dev, } /* Move index for reply_rpl */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->resp_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing reply_rpl to 0x%llx size %d at " "local idx %d req_idx %d global_idx %d", @@ -857,9 +882,9 @@ vioscsi_handle_report_luns(struct vioscsi_dev *dev, __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -870,8 +895,9 @@ rpl_out: } static int -vioscsi_handle_read_6(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_read_6(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; @@ -879,26 +905,32 @@ vioscsi_handle_read_6(struct vioscsi_dev *dev, uint32_t read_lba; struct ioinfo *info; struct scsi_rw *read_6; + struct vioscsi_dev *vioscsi = NULL; + if (dev->device_id != PCI_PRODUCT_VIRTIO_SCSI) + fatalx("%s: virtio device is not a scsi device", __func__); + vioscsi = &dev->vioscsi; + memset(&resp, 0, sizeof(resp)); read_6 = (struct scsi_rw *)(req->cdb); read_lba = ((read_6->addr[0] & SRW_TOPADDR) << 16 ) | (read_6->addr[1] << 8) | read_6->addr[2]; DPRINTF("%s: READ Addr 0x%08x Len %d (%d)", - __func__, read_lba, read_6->length, read_6->length * dev->max_xfer); + __func__, read_lba, read_6->length, + read_6->length * dev->vioscsi.max_xfer); /* check if lba is in range */ - if (read_lba > dev->n_blocks - 1) { + if (read_lba > vioscsi->n_blocks - 1) { DPRINTF("%s: requested block out of range req: %ud max: %lld", - __func__, read_lba, dev->n_blocks); + __func__, read_lba, vioscsi->n_blocks); vioscsi_prepare_resp(&resp, VIRTIO_SCSI_S_OK, SCSI_CHECK, SKEY_ILLEGAL_REQUEST, SENSE_LBA_OUT_OF_RANGE, SENSE_DEFAULT_ASCQ); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -907,9 +939,9 @@ vioscsi_handle_read_6(struct vioscsi_dev *dev, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } goto read_6_out; @@ -933,7 +965,7 @@ vioscsi_handle_read_6(struct vioscsi_dev *dev, SENSE_MEDIUM_NOT_PRESENT, SENSE_DEFAULT_ASCQ); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -942,9 +974,9 @@ vioscsi_handle_read_6(struct vioscsi_dev *dev, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -954,8 +986,8 @@ vioscsi_handle_read_6(struct vioscsi_dev *dev, vioscsi_prepare_resp(&resp, VIRTIO_SCSI_S_OK, SCSI_OK, 0, 0, 0); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->req_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d at local " "idx %d req_idx %d global_idx %d", @@ -969,8 +1001,8 @@ vioscsi_handle_read_6(struct vioscsi_dev *dev, } /* Move index for read_buf */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->resp_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing read_buf to 0x%llx size %d at " "local idx %d req_idx %d global_idx %d", @@ -982,9 +1014,9 @@ vioscsi_handle_read_6(struct vioscsi_dev *dev, __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -995,8 +1027,9 @@ read_6_out: } static int -vioscsi_handle_read_10(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_read_10(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; @@ -1007,7 +1040,12 @@ vioscsi_handle_read_10(struct vioscsi_dev *dev, struct ioinfo *info; struct scsi_rw_10 *read_10; size_t chunk_len = 0; + struct vioscsi_dev *vioscsi = NULL; + if (dev->device_id != PCI_PRODUCT_VIRTIO_SCSI) + fatalx("%s: virtio device is not a scsi device", __func__); + vioscsi = &dev->vioscsi; + memset(&resp, 0, sizeof(resp)); read_10 = (struct scsi_rw_10 *)(req->cdb); read_lba = _4btol(read_10->addr); @@ -1015,19 +1053,19 @@ vioscsi_handle_read_10(struct vioscsi_dev *dev, chunk_offset = 0; DPRINTF("%s: READ_10 Addr 0x%08x Len %d (%d)", - __func__, read_lba, read_10_len, read_10_len * dev->max_xfer); + __func__, read_lba, read_10_len, read_10_len * vioscsi->max_xfer); /* check if lba is in range */ - if (read_lba > dev->n_blocks - 1) { + if (read_lba > vioscsi->n_blocks - 1) { DPRINTF("%s: requested block out of range req: %ud max: %lld", - __func__, read_lba, dev->n_blocks); + __func__, read_lba, vioscsi->n_blocks); vioscsi_prepare_resp(&resp, VIRTIO_SCSI_S_OK, SCSI_CHECK, SKEY_ILLEGAL_REQUEST, SENSE_LBA_OUT_OF_RANGE, SENSE_DEFAULT_ASCQ); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -1035,9 +1073,9 @@ vioscsi_handle_read_10(struct vioscsi_dev *dev, __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -1061,7 +1099,7 @@ vioscsi_handle_read_10(struct vioscsi_dev *dev, SENSE_MEDIUM_NOT_PRESENT, SENSE_DEFAULT_ASCQ); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -1069,9 +1107,9 @@ vioscsi_handle_read_10(struct vioscsi_dev *dev, __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -1081,8 +1119,8 @@ vioscsi_handle_read_10(struct vioscsi_dev *dev, vioscsi_prepare_resp(&resp, VIRTIO_SCSI_S_OK, SCSI_OK, 0, 0, 0); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->req_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d at local " "idx %d req_idx %d global_idx %d", @@ -1103,7 +1141,7 @@ vioscsi_handle_read_10(struct vioscsi_dev *dev, */ do { /* Move index for read_buf */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing read_buf to 0x%llx size " @@ -1130,9 +1168,9 @@ vioscsi_handle_read_10(struct vioscsi_dev *dev, } while (chunk_offset < info->len); ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, acct->req_desc, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); free_read_10: @@ -1142,35 +1180,41 @@ read_10_out: } static int -vioscsi_handle_prevent_allow(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_prevent_allow(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; + struct vioscsi_dev *vioscsi = NULL; struct virtio_scsi_res_hdr resp; + if (dev->device_id != PCI_PRODUCT_VIRTIO_SCSI) + fatalx("%s: virtio device is not a scsi device", __func__); + vioscsi = &dev->vioscsi; + memset(&resp, 0, sizeof(resp)); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->req_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->req_desc, &(acct->resp_idx)); vioscsi_prepare_resp(&resp, VIRTIO_SCSI_S_OK, SCSI_OK, 0, 0, 0); - if (dev->locked) { + if (vioscsi->locked) { DPRINTF("%s: unlocking medium", __func__); } else { DPRINTF("%s: locking medium", __func__); } - dev->locked = dev->locked ? 0 : 1; + vioscsi->locked = vioscsi->locked ? 0 : 1; if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { log_warnx("%s: unable to write OK resp status data @ 0x%llx", __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -1178,8 +1222,9 @@ vioscsi_handle_prevent_allow(struct vioscsi_dev *dev, } static int -vioscsi_handle_mechanism_status(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_mechanism_status(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; @@ -1200,7 +1245,7 @@ vioscsi_handle_mechanism_status(struct vioscsi_dev *de VIRTIO_SCSI_S_OK, SCSI_OK, 0, 0, 0); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -1210,8 +1255,8 @@ vioscsi_handle_mechanism_status(struct vioscsi_dev *de } /* Move index for mech_status_header */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->resp_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->resp_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, mech_status_header, sizeof(struct scsi_mechanism_status_header))) { @@ -1221,9 +1266,9 @@ vioscsi_handle_mechanism_status(struct vioscsi_dev *de __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -1234,8 +1279,9 @@ mech_out: } static int -vioscsi_handle_read_toc(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_read_toc(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; @@ -1243,7 +1289,12 @@ vioscsi_handle_read_toc(struct vioscsi_dev *dev, uint8_t toc_data[TOC_DATA_SIZE]; uint8_t *toc_data_p; struct scsi_read_toc *toc = (struct scsi_read_toc *)(req->cdb); + struct vioscsi_dev *vioscsi = NULL; + if (dev->device_id != PCI_PRODUCT_VIRTIO_SCSI) + fatalx("%s: virtio device is not a scsi device", __func__); + vioscsi = &dev->vioscsi; + DPRINTF("%s: %s - MSF %d Track 0x%02x Addr 0x%04x", __func__, vioscsi_op_names(toc->opcode), ((toc->byte2 >> 1) & 1), toc->from_track, _2btol(toc->data_len)); @@ -1261,7 +1312,7 @@ vioscsi_handle_read_toc(struct vioscsi_dev *dev, SENSE_ILLEGAL_CDB_FIELD, SENSE_DEFAULT_ASCQ); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -1271,9 +1322,9 @@ vioscsi_handle_read_toc(struct vioscsi_dev *dev, } ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); goto read_toc_out; @@ -1322,7 +1373,7 @@ vioscsi_handle_read_toc(struct vioscsi_dev *dev, *toc_data_p++ = READ_TOC_LEAD_OUT_TRACK; *toc_data_p++ = 0x0; - _lto4b((uint32_t)dev->n_blocks, toc_data_p); + _lto4b((uint32_t)vioscsi->n_blocks, toc_data_p); toc_data_p += 4; toc_data_len = toc_data_p - toc_data; @@ -1332,8 +1383,8 @@ vioscsi_handle_read_toc(struct vioscsi_dev *dev, vioscsi_prepare_resp(&resp, VIRTIO_SCSI_S_OK, SCSI_OK, 0, 0, 0); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->req_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d at local " "idx %d req_idx %d global_idx %d", @@ -1347,8 +1398,8 @@ vioscsi_handle_read_toc(struct vioscsi_dev *dev, } /* Move index for toc descriptor */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->resp_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing toc_data to 0x%llx size %d at " "local idx %d req_idx %d global_idx %d", @@ -1360,9 +1411,9 @@ vioscsi_handle_read_toc(struct vioscsi_dev *dev, __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -1371,8 +1422,9 @@ read_toc_out: } static int -vioscsi_handle_read_disc_info(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_read_disc_info(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; @@ -1387,7 +1439,7 @@ vioscsi_handle_read_disc_info(struct vioscsi_dev *dev, SENSE_ILLEGAL_CDB_FIELD, SENSE_DEFAULT_ASCQ); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -1395,9 +1447,9 @@ vioscsi_handle_read_disc_info(struct vioscsi_dev *dev, __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -1405,8 +1457,9 @@ vioscsi_handle_read_disc_info(struct vioscsi_dev *dev, } static int -vioscsi_handle_gesn(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_gesn(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; @@ -1414,7 +1467,12 @@ vioscsi_handle_gesn(struct vioscsi_dev *dev, struct scsi_gesn *gesn; struct scsi_gesn_event_header *gesn_event_header; struct scsi_gesn_power_event *gesn_power_event; + struct vioscsi_dev *vioscsi = NULL; + if (dev->device_id != PCI_PRODUCT_VIRTIO_SCSI) + fatalx("%s: virtio device is not a scsi device", __func__); + vioscsi = &dev->vioscsi; + memset(&resp, 0, sizeof(resp)); gesn = (struct scsi_gesn *)(req->cdb); DPRINTF("%s: GESN Method %s", __func__, @@ -1427,7 +1485,7 @@ vioscsi_handle_gesn(struct vioscsi_dev *dev, SENSE_ILLEGAL_CDB_FIELD, SENSE_DEFAULT_ASCQ); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); if (write_mem(acct->resp_desc->addr, &resp, sizeof(resp))) { @@ -1437,9 +1495,9 @@ vioscsi_handle_gesn(struct vioscsi_dev *dev, } ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); goto gesn_out; @@ -1454,14 +1512,14 @@ vioscsi_handle_gesn(struct vioscsi_dev *dev, /* set event descriptor */ gesn_power_event->event_code = GESN_CODE_NOCHG; - if (dev->locked) + if (vioscsi->locked) gesn_power_event->status = GESN_STATUS_ACTIVE; else gesn_power_event->status = GESN_STATUS_IDLE; /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->req_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d at local " "idx %d req_idx %d global_idx %d", @@ -1475,8 +1533,8 @@ vioscsi_handle_gesn(struct vioscsi_dev *dev, } /* Move index for gesn_reply */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->resp_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing gesn_reply to 0x%llx size %d at " "local idx %d req_idx %d global_idx %d", @@ -1489,9 +1547,9 @@ vioscsi_handle_gesn(struct vioscsi_dev *dev, __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -1500,8 +1558,9 @@ gesn_out: } static int -vioscsi_handle_get_config(struct vioscsi_dev *dev, - struct virtio_scsi_req_hdr *req, struct virtio_vq_acct *acct) +vioscsi_handle_get_config(struct virtio_dev *dev, + struct virtio_vq_info *vq_info, struct virtio_scsi_req_hdr *req, + struct virtio_vq_acct *acct) { int ret = 0; struct virtio_scsi_res_hdr resp; @@ -1513,6 +1572,7 @@ vioscsi_handle_get_config(struct vioscsi_dev *dev, struct scsi_config_morphing_descriptor *config_morphing_desc; struct scsi_config_remove_media_descriptor *config_remove_media_desc; struct scsi_config_random_read_descriptor *config_random_read_desc; + struct vioscsi_dev *vioscsi = NULL; #if DEBUG struct scsi_get_configuration *get_configuration = @@ -1522,6 +1582,10 @@ vioscsi_handle_get_config(struct vioscsi_dev *dev, _2btol(get_configuration->length)); #endif /* DEBUG */ + if (dev->device_id != PCI_PRODUCT_VIRTIO_SCSI) + fatalx("%s: virtio device is not a scsi device", __func__); + vioscsi = &dev->vioscsi; + get_conf_reply = (uint8_t*)calloc(G_CONFIG_REPLY_SIZE, sizeof(uint8_t)); if (get_conf_reply == NULL) @@ -1592,10 +1656,11 @@ vioscsi_handle_get_config(struct vioscsi_dev *dev, config_random_read_desc->feature_code); config_random_read_desc->byte3 = CONFIG_RANDOM_READ_BYTE3; config_random_read_desc->length = CONFIG_RANDOM_READ_LENGTH; - if (dev->n_blocks >= UINT32_MAX) + if (vioscsi->n_blocks >= UINT32_MAX) _lto4b(UINT32_MAX, config_random_read_desc->block_size); else - _lto4b(dev->n_blocks - 1, config_random_read_desc->block_size); + _lto4b(vioscsi->n_blocks - 1, + config_random_read_desc->block_size); _lto2b(CONFIG_RANDOM_READ_BLOCKING_TYPE, config_random_read_desc->blocking_type); @@ -1603,7 +1668,7 @@ vioscsi_handle_get_config(struct vioscsi_dev *dev, vioscsi_prepare_resp(&resp, VIRTIO_SCSI_S_OK, SCSI_OK, 0, 0, 0); /* Move index for response */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, acct->req_desc, &(acct->resp_idx)); DPRINTF("%s: writing resp to 0x%llx size %d at local " @@ -1618,8 +1683,8 @@ vioscsi_handle_get_config(struct vioscsi_dev *dev, } /* Move index for get_conf_reply */ - acct->resp_desc = vioscsi_next_ring_desc(acct->desc, acct->resp_desc, - &(acct->resp_idx)); + acct->resp_desc = vioscsi_next_ring_desc(vq_info, acct->desc, + acct->resp_desc, &(acct->resp_idx)); DPRINTF("%s: writing get_conf_reply to 0x%llx size %d " "at local idx %d req_idx %d global_idx %d", @@ -1633,9 +1698,9 @@ vioscsi_handle_get_config(struct vioscsi_dev *dev, __func__, acct->resp_desc->addr); } else { ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct->avail, acct->used, + vioscsi_next_ring_item(vq_info, acct->avail, acct->used, acct->req_desc, acct->req_idx); } @@ -1649,421 +1714,111 @@ int vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, void *cookie, uint8_t sz) { - struct vioscsi_dev *dev = (struct vioscsi_dev *)cookie; + struct virtio_dev *dev = (struct virtio_dev *)cookie; + struct vioscsi_dev *vioscsi = NULL; + if (dev->device_id != PCI_PRODUCT_VIRTIO_SCSI) + fatalx("%s: virtio device is not a scsi device", __func__); + vioscsi = &dev->vioscsi; + *intr = 0xFF; - DPRINTF("%s: request %s reg %u, %s sz %u", __func__, - dir ? "READ" : "WRITE", reg, vioscsi_reg_name(reg), sz); + DPRINTF("%s: request %s reg %s sz %u", __func__, + dir ? "READ" : "WRITE", vioscsi_reg_name(reg), sz); - if (dir == 0) { + if (dir == VEI_DIR_OUT) { switch (reg) { - case VIRTIO_CONFIG_DEVICE_FEATURES: - case VIRTIO_CONFIG_QUEUE_SIZE: - case VIRTIO_CONFIG_ISR_STATUS: - log_warnx("%s: illegal write %x to %s", - __progname, *data, vioscsi_reg_name(reg)); + case VIRTIO_SCSI_CONFIG_SENSE_SIZE: + /* XXX support writing to sense size register */ + if (*data != VIOSCSI_SENSE_LEN) + log_warnx("%s: guest write to sense size " + "register ignored", __func__); break; - case VIRTIO_CONFIG_GUEST_FEATURES: - dev->cfg.guest_feature = *data; - DPRINTF("%s: guest feature set to %u", - __func__, dev->cfg.guest_feature); + case VIRTIO_SCSI_CONFIG_CDB_SIZE: + /* XXX support writing CDB size. */ + if (*data != VIOSCSI_CDB_LEN) + log_warnx("%s: guest write to cdb size " + "register ignored", __func__); break; - case VIRTIO_CONFIG_QUEUE_PFN: - dev->cfg.queue_pfn = *data; - vioscsi_update_qa(dev); - break; - case VIRTIO_CONFIG_QUEUE_SELECT: - dev->cfg.queue_select = *data; - vioscsi_update_qs(dev); - break; - case VIRTIO_CONFIG_QUEUE_NOTIFY: - dev->cfg.queue_notify = *data; - if (vioscsi_notifyq(dev)) - *intr = 1; - break; - case VIRTIO_CONFIG_DEVICE_STATUS: - dev->cfg.device_status = *data; - DPRINTF("%s: device status set to %u", - __func__, dev->cfg.device_status); - if (dev->cfg.device_status == 0) { - log_debug("%s: device reset", __func__); - dev->cfg.guest_feature = 0; - dev->cfg.queue_pfn = 0; - vioscsi_update_qa(dev); - dev->cfg.queue_size = 0; - vioscsi_update_qs(dev); - dev->cfg.queue_select = 0; - dev->cfg.queue_notify = 0; - dev->cfg.isr_status = 0; - dev->vq[0].last_avail = 0; - dev->vq[1].last_avail = 0; - dev->vq[2].last_avail = 0; - } - break; default: + log_warnx("%s: invalid register 0x%04x", __func__, reg); break; } } else { switch (reg) { - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: - /* VIRTIO_SCSI_CONFIG_NUM_QUEUES, 32bit */ - if (sz == 4) - *data = (uint32_t)VIOSCSI_NUM_QUEUES; - else if (sz == 1) { - /* read first byte of num_queues */ - *data &= 0xFFFFFF00; - *data |= (uint32_t)(VIOSCSI_NUM_QUEUES) & 0xFF; - } + case VIRTIO_SCSI_CONFIG_NUM_QUEUES: + /* Number of request queues, not number of all queues */ + if (sz == sizeof(uint32_t)) + *data = (uint32_t)(VIRTIO_SCSI_QUEUES); + else + log_warnx("%s: unaligned read of num queues " + "register", __func__); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: - if (sz == 1) { - /* read second byte of num_queues */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_NUM_QUEUES >> 8) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: - if (sz == 1) { - /* read third byte of num_queues */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_NUM_QUEUES >> 16) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: - if (sz == 1) { - /* read fourth byte of num_queues */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_NUM_QUEUES >> 24) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: - /* VIRTIO_SCSI_CONFIG_SEG_MAX, 32bit */ - if (sz == 4) + case VIRTIO_SCSI_CONFIG_SEG_MAX: + if (sz == sizeof(uint32_t)) *data = (uint32_t)(VIOSCSI_SEG_MAX); - else if (sz == 1) { - /* read first byte of seg_max */ - *data &= 0xFFFFFF00; - *data |= (uint32_t)(VIOSCSI_SEG_MAX) & 0xFF; - } + else + log_warnx("%s: unaligned read of seg max " + "register", __func__); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: - if (sz == 1) { - /* read second byte of seg_max */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_SEG_MAX >> 8) & 0xFF; - } + case VIRTIO_SCSI_CONFIG_MAX_SECTORS: + if (sz == sizeof(uint32_t)) + *data = (uint32_t)(vioscsi->max_xfer); + else + log_warnx("%s: unaligned read of max sectors " + "register", __func__); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6: - if (sz == 1) { - /* read third byte of seg_max */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_SEG_MAX >> 16) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7: - if (sz == 1) { - /* read fourth byte of seg_max */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_SEG_MAX >> 24) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: - /* VIRTIO_SCSI_CONFIG_MAX_SECTORS, 32bit */ - if (sz == 4) - *data = (uint32_t)(dev->max_xfer); - else if (sz == 1) { - /* read first byte of max_xfer */ - *data &= 0xFFFFFF00; - *data |= (uint32_t)(dev->max_xfer) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 9: - if (sz == 1) { - /* read second byte of max_xfer */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(dev->max_xfer >> 8) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 10: - if (sz == 1) { - /* read third byte of max_xfer */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(dev->max_xfer >> 16) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 11: - if (sz == 1) { - /* read fourth byte of max_xfer */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(dev->max_xfer >> 24) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: - /* VIRTIO_SCSI_CONFIG_CMD_PER_LUN, 32bit */ - if (sz == 4) + case VIRTIO_SCSI_CONFIG_CMD_PER_LUN: + if (sz == sizeof(uint32_t)) *data = (uint32_t)(VIOSCSI_CMD_PER_LUN); - else if (sz == 1) { - /* read first byte of cmd_per_lun */ - *data &= 0xFFFFFF00; - *data |= (uint32_t)(VIOSCSI_CMD_PER_LUN) & 0xFF; - } + else + log_warnx("%s: unaligned read of cmd per lun " + "register", __func__); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 13: - if (sz == 1) { - /* read second byte of cmd_per_lun */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_CMD_PER_LUN >> 8) & 0xFF; - } + case VIRTIO_SCSI_CONFIG_EVENT_INFO_SIZE: + *data = 0; break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 14: - if (sz == 1) { - /* read third byte of cmd_per_lun */ - *data &= 0xFFFFFF00; - *data |= (uint32_t)(VIOSCSI_CMD_PER_LUN >> 16) - & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 15: - if (sz == 1) { - /* read fourth byte of cmd_per_lun */ - *data &= 0xFFFFFF00; - *data |= (uint32_t)(VIOSCSI_CMD_PER_LUN >> 24) - & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: - /* VIRTIO_SCSI_CONFIG_EVENT_INFO_SIZE, 32bit */ - *data = 0x00; - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 20: - /* VIRTIO_SCSI_CONFIG_SENSE_SIZE, 32bit */ - if (sz == 4) + case VIRTIO_SCSI_CONFIG_SENSE_SIZE: + if (sz == sizeof(uint32_t)) *data = (uint32_t)(VIOSCSI_SENSE_LEN); - else if (sz == 1) { - /* read first byte of sense_size */ - *data &= 0xFFFFFF00; - *data |= (uint32_t)(VIOSCSI_SENSE_LEN) & 0xFF; - } + else + log_warnx("%s: unaligned read of sense size " + "register", __func__); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 21: - if (sz == 1) { - /* read second byte of sense_size */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_SENSE_LEN >> 8) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 22: - if (sz == 1) { - /* read third byte of sense_size */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_SENSE_LEN >> 16) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 23: - if (sz == 1) { - /* read fourth byte of sense_size */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_SENSE_LEN >> 24) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 24: - /* VIRTIO_SCSI_CONFIG_CDB_SIZE, 32bit */ - if (sz == 4) + case VIRTIO_SCSI_CONFIG_CDB_SIZE: + if (sz == sizeof(uint32_t)) *data = (uint32_t)(VIOSCSI_CDB_LEN); - else if (sz == 1) { - /* read first byte of cdb_len */ - *data &= 0xFFFFFF00; - *data |= (uint32_t)(VIOSCSI_CDB_LEN) & 0xFF; - } + else + log_warnx("%s: unaligned read of cdb size " + "register", __func__); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 25: - if (sz == 1) { - /* read second byte of cdb_len */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_CDB_LEN >> 8) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 26: - if (sz == 1) { - /* read third byte of cdb_len */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_CDB_LEN >> 16) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 27: - if (sz == 1) { - /* read fourth byte of cdb_len */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_CDB_LEN >> 24) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 28: - /* VIRTIO_SCSI_CONFIG_MAX_CHANNEL, 16bit */ - + case VIRTIO_SCSI_CONFIG_MAX_CHANNEL: /* defined by standard to be zero */ - *data &= 0xFFFF0000; + *data = 0; break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 29: - /* defined by standard to be zero */ - *data &= 0xFFFF0000; + case VIRTIO_SCSI_CONFIG_MAX_TARGET: + if (sz == sizeof(uint16_t)) + *data = (uint32_t)(VIOSCSI_MAX_TARGET); + else + log_warnx("%s: unaligned read of max target " + "register", __func__); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 30: - /* VIRTIO_SCSI_CONFIG_MAX_TARGET, 16bit */ - if (sz == 2) { - *data &= 0xFFFF0000; - *data |= - (uint32_t)(VIOSCSI_MAX_TARGET) & 0xFFFF; - } else if (sz == 1) { - /* read first byte of max_target */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_MAX_TARGET) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 31: - if (sz == 1) { - /* read second byte of max_target */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_MAX_TARGET >> 8) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 32: - /* VIRTIO_SCSI_CONFIG_MAX_LUN, 32bit */ - if (sz == 4) + case VIRTIO_SCSI_CONFIG_MAX_LUN: + if (sz == sizeof(uint32_t)) *data = (uint32_t)(VIOSCSI_MAX_LUN); - else if (sz == 1) { - /* read first byte of max_lun */ - *data &= 0xFFFFFF00; - *data |= (uint32_t)(VIOSCSI_MAX_LUN) & 0xFF; - } + else + log_warnx("%s: unaligned read of max lun " + "register", __func__); break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 33: - if (sz == 1) { - /* read second byte of max_lun */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_MAX_LUN >> 8) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 34: - if (sz == 1) { - /* read third byte of max_lun */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_MAX_LUN >> 16) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 35: - if (sz == 1) { - /* read fourth byte of max_lun */ - *data &= 0xFFFFFF00; - *data |= - (uint32_t)(VIOSCSI_MAX_LUN >> 24) & 0xFF; - } - break; - case VIRTIO_CONFIG_DEVICE_FEATURES: - *data = dev->cfg.device_feature; - break; - case VIRTIO_CONFIG_GUEST_FEATURES: - *data = dev->cfg.guest_feature; - break; - case VIRTIO_CONFIG_QUEUE_PFN: - *data = dev->cfg.queue_pfn; - break; - case VIRTIO_CONFIG_QUEUE_SIZE: - if (sz == 4) - *data = dev->cfg.queue_size; - else if (sz == 2) { - *data &= 0xFFFF0000; - *data |= (uint16_t)dev->cfg.queue_size; - } else if (sz == 1) { - *data &= 0xFFFFFF00; - *data |= (uint8_t)dev->cfg.queue_size; - } - break; - case VIRTIO_CONFIG_QUEUE_SELECT: - *data = dev->cfg.queue_select; - break; - case VIRTIO_CONFIG_QUEUE_NOTIFY: - *data = dev->cfg.queue_notify; - break; - case VIRTIO_CONFIG_DEVICE_STATUS: - if (sz == 4) - *data = dev->cfg.device_status; - else if (sz == 2) { - *data &= 0xFFFF0000; - *data |= (uint16_t)dev->cfg.device_status; - } else if (sz == 1) { - *data &= 0xFFFFFF00; - *data |= (uint8_t)dev->cfg.device_status; - } - break; - case VIRTIO_CONFIG_ISR_STATUS: - *data = dev->cfg.isr_status; - dev->cfg.isr_status = 0; - break; + default: + log_warnx("%s: invalid register 0x%04x", __func__, reg); + *data = (uint32_t)(-1); } } - return (0); } -void -vioscsi_update_qs(struct vioscsi_dev *dev) -{ - struct virtio_vq_info *vq_info; - - /* Invalid queue? */ - if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES) { - dev->cfg.queue_size = 0; - return; - } - - vq_info = &dev->vq[dev->cfg.queue_select]; - - /* Update queue pfn/size based on queue select */ - dev->cfg.queue_pfn = vq_info->q_gpa >> 12; - dev->cfg.queue_size = vq_info->qs; -} - -void -vioscsi_update_qa(struct vioscsi_dev *dev) -{ - struct virtio_vq_info *vq_info; - void *hva = NULL; - - /* Invalid queue? */ - if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES) - return; - - vq_info = &dev->vq[dev->cfg.queue_select]; - vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; - - hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOSCSI_QUEUE_SIZE)); - if (hva == NULL) - fatal("vioscsi_update_qa"); - vq_info->q_hva = hva; -} - /* * Process message(s) in the queue(s) * vioscsi driver will be placing the following in the queue for each iteration @@ -2075,22 +1830,17 @@ vioscsi_update_qa(struct vioscsi_dev *dev) * 0 otherwise */ int -vioscsi_notifyq(struct vioscsi_dev *dev) +vioscsi_notifyq(struct virtio_dev *dev, uint16_t vq_idx) { - int cnt, ret = 0; + size_t cnt; + int ret = 0; char *vr; struct virtio_scsi_req_hdr req; struct virtio_scsi_res_hdr resp; struct virtio_vq_acct acct; struct virtio_vq_info *vq_info; - ret = 0; - - /* Invalid queue? */ - if (dev->cfg.queue_notify >= VIRTIO_MAX_QUEUES) - return (ret); - - vq_info = &dev->vq[dev->cfg.queue_notify]; + vq_info = &dev->vq[vq_idx]; vr = vq_info->q_hva; if (vr == NULL) fatalx("%s: null vring", __func__); @@ -2100,23 +1850,23 @@ vioscsi_notifyq(struct vioscsi_dev *dev) acct.avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); acct.used = (struct vring_used *)(vr + vq_info->vq_usedoffset); - acct.idx = vq_info->last_avail & VIOSCSI_QUEUE_MASK; + acct.idx = vq_info->last_avail & vq_info->mask; - if ((acct.avail->idx & VIOSCSI_QUEUE_MASK) == acct.idx) { + if ((acct.avail->idx & vq_info->mask) == acct.idx) { log_debug("%s - nothing to do?", __func__); return (0); } cnt = 0; - while (acct.idx != (acct.avail->idx & VIOSCSI_QUEUE_MASK)) { + while (acct.idx != (acct.avail->idx & vq_info->mask)) { /* Guard against infinite descriptor chains */ - if (++cnt >= VIOSCSI_QUEUE_SIZE) { + if (++cnt >= vq_info->qs) { log_warnx("%s: invalid descriptor table", __func__); goto out; } - acct.req_idx = acct.avail->ring[acct.idx] & VIOSCSI_QUEUE_MASK; + acct.req_idx = acct.avail->ring[acct.idx] & vq_info->mask; acct.req_desc = &(acct.desc[acct.req_idx]); /* Clear resp for next message */ @@ -2155,8 +1905,8 @@ vioscsi_notifyq(struct vioscsi_dev *dev) __func__, req.cdb[0], vioscsi_op_names(req.cdb[0]), req.lun[0], req.lun[1], req.lun[2], req.lun[3]); /* Move index for response */ - acct.resp_desc = vioscsi_next_ring_desc(acct.desc, - acct.req_desc, &(acct.resp_idx)); + acct.resp_desc = vioscsi_next_ring_desc(vq_info, + acct.desc, acct.req_desc, &(acct.resp_idx)); vioscsi_prepare_resp(&resp, VIRTIO_SCSI_S_BAD_TARGET, SCSI_OK, 0, 0, 0); @@ -2175,79 +1925,89 @@ vioscsi_notifyq(struct vioscsi_dev *dev) } ret = 1; - dev->cfg.isr_status = 1; + dev->isr = 1; /* Move ring indexes (updates the used ring index) */ - vioscsi_next_ring_item(dev, acct.avail, acct.used, + vioscsi_next_ring_item(vq_info, acct.avail, acct.used, acct.req_desc, acct.req_idx); goto next_msg; } DPRINTF("%s: Queue %d id 0x%llx lun %u:%u:%u:%u" " cdb OP 0x%02x,%s", - __func__, dev->cfg.queue_notify, req.id, - req.lun[0], req.lun[1], req.lun[2], req.lun[3], - req.cdb[0], vioscsi_op_names(req.cdb[0])); + __func__, vq_idx, req.id, req.lun[0], req.lun[1], + req.lun[2], req.lun[3],req.cdb[0], + vioscsi_op_names(req.cdb[0])); /* opcode is first byte */ switch (req.cdb[0]) { case TEST_UNIT_READY: case START_STOP: - ret = vioscsi_handle_tur(dev, &req, &acct); + ret = vioscsi_handle_tur(dev, vq_info, &req, &acct); break; case PREVENT_ALLOW: - ret = vioscsi_handle_prevent_allow(dev, &req, &acct); + ret = vioscsi_handle_prevent_allow(dev, vq_info, &req, + &acct); break; case READ_TOC: - ret = vioscsi_handle_read_toc(dev, &req, &acct); + ret = vioscsi_handle_read_toc(dev, vq_info, &req, + &acct); break; case READ_CAPACITY: - ret = vioscsi_handle_read_capacity(dev, &req, &acct); + ret = vioscsi_handle_read_capacity(dev, vq_info, &req, + &acct); break; case READ_CAPACITY_16: - ret = vioscsi_handle_read_capacity_16(dev, &req, &acct); + ret = vioscsi_handle_read_capacity_16(dev, vq_info, + &req, &acct); break; case READ_COMMAND: - ret = vioscsi_handle_read_6(dev, &req, &acct); + ret = vioscsi_handle_read_6(dev, vq_info, &req, &acct); break; case READ_10: - ret = vioscsi_handle_read_10(dev, &req, &acct); + ret = vioscsi_handle_read_10(dev, vq_info, &req, &acct); break; case INQUIRY: - ret = vioscsi_handle_inquiry(dev, &req, &acct); + ret = vioscsi_handle_inquiry(dev, vq_info, &req, &acct); break; case MODE_SENSE: - ret = vioscsi_handle_mode_sense(dev, &req, &acct); + ret = vioscsi_handle_mode_sense(dev, vq_info, &req, + &acct); break; case MODE_SENSE_BIG: - ret = vioscsi_handle_mode_sense_big(dev, &req, &acct); + ret = vioscsi_handle_mode_sense_big(dev, vq_info, &req, + &acct); break; case GET_EVENT_STATUS_NOTIFICATION: - ret = vioscsi_handle_gesn(dev, &req, &acct); + ret = vioscsi_handle_gesn(dev, vq_info, &req, &acct); break; case READ_DISC_INFORMATION: - ret = vioscsi_handle_read_disc_info(dev, &req, &acct); + ret = vioscsi_handle_read_disc_info(dev, vq_info, &req, + &acct); break; case GET_CONFIGURATION: - ret = vioscsi_handle_get_config(dev, &req, &acct); + ret = vioscsi_handle_get_config(dev, vq_info, &req, + &acct); break; case MECHANISM_STATUS: - ret = vioscsi_handle_mechanism_status(dev, &req, &acct); + ret = vioscsi_handle_mechanism_status(dev, vq_info, + &req, &acct); break; case REPORT_LUNS: - ret = vioscsi_handle_report_luns(dev, &req, &acct); + ret = vioscsi_handle_report_luns(dev, vq_info, &req, + &acct); break; default: log_warnx("%s: unsupported opcode 0x%02x,%s", __func__, req.cdb[0], vioscsi_op_names(req.cdb[0])); /* Move ring indexes */ - vioscsi_next_ring_item(dev, acct.avail, acct.used, + vioscsi_next_ring_item(vq_info, acct.avail, acct.used, acct.req_desc, acct.req_idx); break; } next_msg: /* Increment to the next queue slot */ - acct.idx = (acct.idx + 1) & VIOSCSI_QUEUE_MASK; + acct.idx = (acct.idx + 1) & vq_info->mask; } out: return (ret); blob - 561f287fcef819ccf99203916caf5c1e87960925 blob + 99905bae1f501a28bdc9a5ab5f74c5cf71109c66 --- usr.sbin/vmd/virtio.c +++ usr.sbin/vmd/virtio.c @@ -43,12 +43,22 @@ #include "virtio.h" #include "vmd.h" +#define VIRTIO_DEBUG 0 +#ifdef DPRINTF +#undef DPRINTF +#endif +#if VIRTIO_DEBUG +#define DPRINTF log_debug +#else +#define DPRINTF(x...) do {} while(0) +#endif /* VIRTIO_DEBUG */ + extern struct vmd *env; extern char *__progname; -struct viornd_dev viornd; -struct vioscsi_dev *vioscsi; -struct vmmci_dev vmmci; +struct virtio_dev viornd; +struct virtio_dev *vioscsi = NULL; +struct virtio_dev vmmci; /* Devices emulated in subprocesses are inserted into this list. */ SLIST_HEAD(virtio_dev_head, virtio_dev) virtio_devs; @@ -64,12 +74,53 @@ SLIST_HEAD(virtio_dev_head, virtio_dev) virtio_devs; #define RXQ 0 #define TXQ 1 +static void virtio_dev_init(struct virtio_dev *, uint8_t, uint16_t, uint16_t, + uint64_t, uint32_t); static int virtio_dev_launch(struct vmd_vm *, struct virtio_dev *); static void virtio_dispatch_dev(int, short, void *); static int handle_dev_msg(struct viodev_msg *, struct virtio_dev *); static int virtio_dev_closefds(struct virtio_dev *); +static void virtio_pci_add_cap(uint8_t, uint8_t, uint8_t, uint32_t); static void vmmci_pipe_dispatch(int, short, void *); +static int virtio_io_dispatch(int, uint16_t, uint32_t *, uint8_t *, void *, + uint8_t); +static int virtio_io_isr(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); +static int virtio_io_notify(int, uint16_t, uint32_t *, uint8_t *, void *, + uint8_t); +static int viornd_notifyq(struct virtio_dev *, uint16_t); + +static void vmmci_ack(struct virtio_dev *, unsigned int); + +#if VIRTIO_DEBUG +static const char * +virtio1_reg_name(uint16_t reg) +{ + switch (reg) { + case VIO1_PCI_DEVICE_FEATURE_SELECT: return "DEVICE_FEATURE_SELECT"; + case VIO1_PCI_DEVICE_FEATURE: return "DEVICE_FEATURE"; + case VIO1_PCI_DRIVER_FEATURE_SELECT: return "DRIVER_FEATURE_SELECT"; + case VIO1_PCI_DRIVER_FEATURE: return "DRIVER_FEATURE"; + case VIO1_PCI_CONFIG_MSIX_VECTOR: return "CONFIG_MSIX_VECTOR"; + case VIO1_PCI_NUM_QUEUES: return "NUM_QUEUES"; + case VIO1_PCI_DEVICE_STATUS: return "DEVICE_STATUS"; + case VIO1_PCI_CONFIG_GENERATION: return "CONFIG_GENERATION"; + case VIO1_PCI_QUEUE_SELECT: return "QUEUE_SELECT"; + case VIO1_PCI_QUEUE_SIZE: return "QUEUE_SIZE"; + case VIO1_PCI_QUEUE_MSIX_VECTOR: return "QUEUE_MSIX_VECTOR"; + case VIO1_PCI_QUEUE_ENABLE: return "QUEUE_ENABLE"; + case VIO1_PCI_QUEUE_NOTIFY_OFF: return "QUEUE_NOTIFY_OFF"; + case VIO1_PCI_QUEUE_DESC: return "QUEUE_DESC"; + case VIO1_PCI_QUEUE_DESC + 4: return "QUEUE_DESC (HIGH)"; + case VIO1_PCI_QUEUE_AVAIL: return "QUEUE_AVAIL"; + case VIO1_PCI_QUEUE_AVAIL + 4: return "QUEUE_AVAIL (HIGH)"; + case VIO1_PCI_QUEUE_USED: return "QUEUE_USED"; + case VIO1_PCI_QUEUE_USED + 4: return "QUEUE_USED (HIGH)"; + default: return "UNKNOWN"; + } +} +#endif /* VIRTIO_DEBUG */ + const char * virtio_reg_name(uint8_t reg) { @@ -111,62 +162,125 @@ vring_size(uint32_t vq_size) /* Update queue select */ void -viornd_update_qs(void) +virtio_update_qs(struct virtio_dev *dev) { - struct virtio_vq_info *vq_info; + struct virtio_vq_info *vq_info = NULL; - /* Invalid queue? */ - if (viornd.cfg.queue_select > 0) { - viornd.cfg.queue_size = 0; - return; + if (dev->driver_feature & VIRTIO_F_VERSION_1) { + /* Invalid queue */ + if (dev->pci_cfg.queue_select >= dev->num_queues) { + dev->pci_cfg.queue_size = 0; + dev->pci_cfg.queue_enable = 0; + return; + } + vq_info = &dev->vq[dev->pci_cfg.queue_select]; + dev->pci_cfg.queue_size = vq_info->qs; + dev->pci_cfg.queue_desc = vq_info->q_gpa; + dev->pci_cfg.queue_avail = vq_info->q_gpa + vq_info->vq_availoffset; + dev->pci_cfg.queue_used = vq_info->q_gpa + vq_info->vq_usedoffset; + dev->pci_cfg.queue_enable = vq_info->vq_enabled; + } else { + /* Invalid queue? */ + if (dev->cfg.queue_select >= dev->num_queues) { + dev->cfg.queue_size = 0; + return; + } + vq_info = &dev->vq[dev->cfg.queue_select]; + dev->cfg.queue_pfn = vq_info->q_gpa >> 12; + dev->cfg.queue_size = vq_info->qs; } - - vq_info = &viornd.vq[viornd.cfg.queue_select]; - - /* Update queue pfn/size based on queue select */ - viornd.cfg.queue_pfn = vq_info->q_gpa >> 12; - viornd.cfg.queue_size = vq_info->qs; } -/* Update queue address */ +/* Update queue address. */ void -viornd_update_qa(void) +virtio_update_qa(struct virtio_dev *dev) { - struct virtio_vq_info *vq_info; + struct virtio_vq_info *vq_info = NULL; void *hva = NULL; - /* Invalid queue? */ - if (viornd.cfg.queue_select > 0) - return; + if (dev->driver_feature & VIRTIO_F_VERSION_1) { + if (dev->pci_cfg.queue_select >= dev->num_queues) { + log_warnx("%s: invalid queue index", __func__); + return; + } + vq_info = &dev->vq[dev->pci_cfg.queue_select]; + vq_info->q_gpa = dev->pci_cfg.queue_desc; - vq_info = &viornd.vq[viornd.cfg.queue_select]; - vq_info->q_gpa = (uint64_t)viornd.cfg.queue_pfn * VIRTIO_PAGE_SIZE; + /* + * Queue size is adjustable by the guest in Virtio 1.x. + * We validate the max size at time of write and not here. + */ + vq_info->qs = dev->pci_cfg.queue_size; + vq_info->mask = vq_info->qs - 1; - hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIORND_QUEUE_SIZE)); - if (hva == NULL) - fatalx("viornd_update_qa"); - vq_info->q_hva = hva; + if (vq_info->qs > 0 && vq_info->qs % 2 == 0) { + vq_info->vq_availoffset = dev->pci_cfg.queue_avail - + dev->pci_cfg.queue_desc; + vq_info->vq_usedoffset = dev->pci_cfg.queue_used - + dev->pci_cfg.queue_desc; + vq_info->vq_enabled = (dev->pci_cfg.queue_enable == 1); + } else { + vq_info->vq_availoffset = 0; + vq_info->vq_usedoffset = 0; + vq_info->vq_enabled = 0; + } + } else { + /* Invalid queue? */ + if (dev->cfg.queue_select >= dev->num_queues) { + log_warnx("%s: invalid queue index", __func__); + return; + } + vq_info = &dev->vq[dev->cfg.queue_select]; + vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * + VIRTIO_PAGE_SIZE; + + /* Queue size is immutable in Virtio 0.9. */ + vq_info->vq_availoffset = sizeof(struct vring_desc) * + vq_info->qs; + vq_info->vq_usedoffset = VIRTQUEUE_ALIGN( + sizeof(struct vring_desc) * vq_info->qs + + sizeof(uint16_t) * (2 + vq_info->qs)); + } + + /* Update any host va mappings. */ + if (vq_info->q_gpa > 0) { + hva = hvaddr_mem(vq_info->q_gpa, vring_size(vq_info->qs)); + if (hva == NULL) + fatalx("%s: failed to translate gpa to hva", __func__); + vq_info->q_hva = hva; + } else { + vq_info->q_hva = NULL; + vq_info->last_avail = 0; + vq_info->notified_avail = 0; + } } -int -viornd_notifyq(void) +static int +viornd_notifyq(struct virtio_dev *dev, uint16_t idx) { size_t sz; - int dxx, ret; + int dxx, ret = 0; uint16_t aidx, uidx; char *vr, *rnd_data; - struct vring_desc *desc; - struct vring_avail *avail; - struct vring_used *used; - struct virtio_vq_info *vq_info; + struct vring_desc *desc = NULL; + struct vring_avail *avail = NULL; + struct vring_used *used = NULL; + struct virtio_vq_info *vq_info = NULL; - ret = 0; + if (dev->device_id != PCI_PRODUCT_VIRTIO_ENTROPY) + fatalx("%s: device is not an entropy device", __func__); - /* Invalid queue? */ - if (viornd.cfg.queue_notify > 0) + if (idx >= dev->num_queues) { + log_warnx("%s: invalid virtqueue index", __func__); return (0); + } + vq_info = &dev->vq[idx]; - vq_info = &viornd.vq[viornd.cfg.queue_notify]; + if (!vq_info->vq_enabled) { + log_warnx("%s: virtqueue not enabled", __func__); + return (0); + } + vr = vq_info->q_hva; if (vr == NULL) fatalx("%s: null vring", __func__); @@ -175,107 +289,425 @@ viornd_notifyq(void) avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); used = (struct vring_used *)(vr + vq_info->vq_usedoffset); - aidx = avail->idx & VIORND_QUEUE_MASK; - uidx = used->idx & VIORND_QUEUE_MASK; + aidx = avail->idx & vq_info->mask; + uidx = used->idx & vq_info->mask; - dxx = avail->ring[aidx] & VIORND_QUEUE_MASK; + dxx = avail->ring[aidx] & vq_info->mask; sz = desc[dxx].len; if (sz > MAXPHYS) fatalx("viornd descriptor size too large (%zu)", sz); rnd_data = malloc(sz); + if (rnd_data == NULL) + fatal("memory allocaiton error for viornd data"); - if (rnd_data != NULL) { - arc4random_buf(rnd_data, sz); - if (write_mem(desc[dxx].addr, rnd_data, sz)) { - log_warnx("viornd: can't write random data @ " - "0x%llx", - desc[dxx].addr); - } else { - /* ret == 1 -> interrupt needed */ - /* XXX check VIRTIO_F_NO_INTR */ - ret = 1; - viornd.cfg.isr_status = 1; - used->ring[uidx].id = dxx; - used->ring[uidx].len = sz; - __sync_synchronize(); - used->idx++; - } - free(rnd_data); - } else - fatal("memory allocation error for viornd data"); + arc4random_buf(rnd_data, sz); + if (write_mem(desc[dxx].addr, rnd_data, sz)) { + log_warnx("viornd: can't write random data @ 0x%llx", + desc[dxx].addr); + } else { + /* ret == 1 -> interrupt needed */ + /* XXX check VIRTIO_F_NO_INTR */ + ret = 1; + viornd.isr = 1; + used->ring[uidx].id = dxx; + used->ring[uidx].len = sz; + __sync_synchronize(); + used->idx++; + } + free(rnd_data); return (ret); } -int -virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, - void *unused, uint8_t sz) +static int +virtio_io_dispatch(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, + void *arg, uint8_t sz) { - *intr = 0xFF; + struct virtio_dev *dev = (struct virtio_dev *)arg; + uint8_t actual = (uint8_t)reg; - if (dir == 0) { + switch (reg & 0xFF00) { + case VIO1_CFG_BAR_OFFSET: + *data = virtio_io_cfg(dev, dir, actual, *data, sz); + break; + case VIO1_DEV_BAR_OFFSET: + if (dev->device_id == PCI_PRODUCT_VIRTIO_SCSI) + return vioscsi_io(dir, actual, data, intr, arg, sz); + else if (dir == VEI_DIR_IN) { + log_debug("%s: no device specific handler", __func__); + *data = (uint32_t)(-1); + } + break; + case VIO1_NOTIFY_BAR_OFFSET: + return virtio_io_notify(dir, actual, data, intr, arg, sz); + case VIO1_ISR_BAR_OFFSET: + return virtio_io_isr(dir, actual, data, intr, arg, sz); + default: + log_debug("%s: no handler for reg 0x%04x", __func__, reg); + if (dir == VEI_DIR_IN) + *data = (uint32_t)(-1); + } + return (0); +} + +/* + * virtio 1.x PCI config register io. If a register is read, returns the value. + * Otherwise returns 0. + */ +uint32_t +virtio_io_cfg(struct virtio_dev *dev, int dir, uint8_t reg, uint32_t data, + uint8_t sz) +{ + struct virtio_pci_common_cfg *pci_cfg = &dev->pci_cfg; + uint32_t res = 0; + uint16_t i; + + if (dir == VEI_DIR_OUT) { switch (reg) { - case VIRTIO_CONFIG_DEVICE_FEATURES: - case VIRTIO_CONFIG_QUEUE_SIZE: - case VIRTIO_CONFIG_ISR_STATUS: - log_warnx("%s: illegal write %x to %s", - __progname, *data, virtio_reg_name(reg)); + case VIO1_PCI_DEVICE_FEATURE_SELECT: + if (sz != 4) + log_warnx("%s: unaligned write to device " + "feature select (sz=%u)", __func__, sz); + else + pci_cfg->device_feature_select = data; break; - case VIRTIO_CONFIG_GUEST_FEATURES: - viornd.cfg.guest_feature = *data; + case VIO1_PCI_DEVICE_FEATURE: + log_warnx("%s: illegal write to device feature " + "register", __progname); break; - case VIRTIO_CONFIG_QUEUE_PFN: - viornd.cfg.queue_pfn = *data; - viornd_update_qa(); + case VIO1_PCI_DRIVER_FEATURE_SELECT: + if (sz != 4) + log_warnx("%s: unaligned write to driver " + "feature select register (sz=%u)", __func__, + sz); + else + pci_cfg->driver_feature_select = data; break; - case VIRTIO_CONFIG_QUEUE_SELECT: - viornd.cfg.queue_select = *data; - viornd_update_qs(); + case VIO1_PCI_DRIVER_FEATURE: + if (sz != 4) { + log_warnx("%s: unaligned write to driver " + "feature register (sz=%u)", __func__, sz); + break; + } + if (pci_cfg->driver_feature_select > 1) { + /* We only support a 64-bit feature space. */ + DPRINTF("%s: ignoring driver feature write", + __func__); + break; + } + pci_cfg->driver_feature = data; + if (pci_cfg->driver_feature_select == 0) + dev->driver_feature |= pci_cfg->driver_feature; + else + dev->driver_feature |= + ((uint64_t)pci_cfg->driver_feature << 32); + dev->driver_feature &= dev->device_feature; + DPRINTF("%s: driver features 0x%llx", __func__, + dev->driver_feature); break; - case VIRTIO_CONFIG_QUEUE_NOTIFY: - viornd.cfg.queue_notify = *data; - if (viornd_notifyq()) - *intr = 1; + case VIO1_PCI_CONFIG_MSIX_VECTOR: + /* Ignore until we support MSIX. */ break; - case VIRTIO_CONFIG_DEVICE_STATUS: - viornd.cfg.device_status = *data; + case VIO1_PCI_NUM_QUEUES: + log_warnx("%s: illegal write to num queues register", + __progname); break; + case VIO1_PCI_DEVICE_STATUS: + if (sz != 1) { + log_warnx("%s: unaligned write to device " + "status register (sz=%u)", __func__, sz); + break; + } + dev->status = data; + if (dev->status == 0) { + /* Reset device and virtqueues (if any). */ + dev->driver_feature = 0; + dev->isr = 0; + + pci_cfg->queue_select = 0; + virtio_update_qs(dev); + + if (dev->num_queues > 0) { + /* + * Reset virtqueues to initial state and + * set to disabled status. Clear PCI + * configuration registers. + */ + for (i = 0; i < dev->num_queues; i++) + virtio_vq_init(dev, i); + } + } + + DPRINTF("%s: dev %u status [%s%s%s%s%s%s]", __func__, + dev->pci_id, + (data & VIRTIO_CONFIG_DEVICE_STATUS_ACK) ? + "[ack]" : "", + (data & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER) ? + "[driver]" : "", + (data & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) ? + "[driver ok]" : "", + (data & VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) ? + "[features ok]" : "", + (data & + VIRTIO_CONFIG_DEVICE_STATUS_DEVICE_NEEDS_RESET) + ? "[needs reset]" : "", + (data & VIRTIO_CONFIG_DEVICE_STATUS_FAILED) ? + "[failed]" : ""); + + break; + case VIO1_PCI_CONFIG_GENERATION: + log_warnx("%s: illegal write to config generation " + "register", __progname); + break; + case VIO1_PCI_QUEUE_SELECT: + pci_cfg->queue_select = data; + virtio_update_qs(dev); + break; + case VIO1_PCI_QUEUE_SIZE: + if (data <= VIRTIO_QUEUE_SIZE_MAX) + pci_cfg->queue_size = data; + else { + log_warnx("%s: clamping queue size", __func__); + pci_cfg->queue_size = VIRTIO_QUEUE_SIZE_MAX; + } + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_MSIX_VECTOR: + /* Ignore until we support MSI-X. */ + break; + case VIO1_PCI_QUEUE_ENABLE: + pci_cfg->queue_enable = data; + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_NOTIFY_OFF: + log_warnx("%s: illegal write to queue notify offset " + "register", __progname); + break; + case VIO1_PCI_QUEUE_DESC: + if (sz != 4) { + log_warnx("%s: unaligned write to queue " + "desc. register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_desc &= 0xffffffff00000000; + pci_cfg->queue_desc |= (uint64_t)data; + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_DESC + 4: + if (sz != 4) { + log_warnx("%s: unaligned write to queue " + "desc. register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_desc &= 0x00000000ffffffff; + pci_cfg->queue_desc |= ((uint64_t)data << 32); + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_AVAIL: + if (sz != 4) { + log_warnx("%s: unaligned write to queue " + "available register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_avail &= 0xffffffff00000000; + pci_cfg->queue_avail |= (uint64_t)data; + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_AVAIL + 4: + if (sz != 4) { + log_warnx("%s: unaligned write to queue " + "available register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_avail &= 0x00000000ffffffff; + pci_cfg->queue_avail |= ((uint64_t)data << 32); + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_USED: + if (sz != 4) { + log_warnx("%s: unaligned write to queue used " + "register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_used &= 0xffffffff00000000; + pci_cfg->queue_used |= (uint64_t)data; + virtio_update_qa(dev); + break; + case VIO1_PCI_QUEUE_USED + 4: + if (sz != 4) { + log_warnx("%s: unaligned write to queue used " + "register (sz=%u)", __func__, sz); + break; + } + pci_cfg->queue_used &= 0x00000000ffffffff; + pci_cfg->queue_used |= ((uint64_t)data << 32); + virtio_update_qa(dev); + break; + default: + log_warnx("%s: invalid register 0x%04x", __func__, reg); } } else { switch (reg) { - case VIRTIO_CONFIG_DEVICE_FEATURES: - *data = viornd.cfg.device_feature; + case VIO1_PCI_DEVICE_FEATURE_SELECT: + res = pci_cfg->device_feature_select; break; - case VIRTIO_CONFIG_GUEST_FEATURES: - *data = viornd.cfg.guest_feature; + case VIO1_PCI_DEVICE_FEATURE: + if (pci_cfg->device_feature_select == 0) + res = dev->device_feature & (uint32_t)(-1); + else if (pci_cfg->device_feature_select == 1) + res = dev->device_feature >> 32; + else { + DPRINTF("%s: ignoring device feature read", + __func__); + } break; - case VIRTIO_CONFIG_QUEUE_PFN: - *data = viornd.cfg.queue_pfn; + case VIO1_PCI_DRIVER_FEATURE_SELECT: + res = pci_cfg->driver_feature_select; break; - case VIRTIO_CONFIG_QUEUE_SIZE: - *data = viornd.cfg.queue_size; + case VIO1_PCI_DRIVER_FEATURE: + if (pci_cfg->driver_feature_select == 0) + res = dev->driver_feature & (uint32_t)(-1); + else if (pci_cfg->driver_feature_select == 1) + res = dev->driver_feature >> 32; + else { + DPRINTF("%s: ignoring driver feature read", + __func__); + } break; - case VIRTIO_CONFIG_QUEUE_SELECT: - *data = viornd.cfg.queue_select; + case VIO1_PCI_CONFIG_MSIX_VECTOR: + res = VIRTIO_MSI_NO_VECTOR; /* Unsupported */ break; - case VIRTIO_CONFIG_QUEUE_NOTIFY: - *data = viornd.cfg.queue_notify; + case VIO1_PCI_NUM_QUEUES: + res = dev->num_queues; break; - case VIRTIO_CONFIG_DEVICE_STATUS: - *data = viornd.cfg.device_status; + case VIO1_PCI_DEVICE_STATUS: + res = dev->status; break; - case VIRTIO_CONFIG_ISR_STATUS: - *data = viornd.cfg.isr_status; - viornd.cfg.isr_status = 0; - vcpu_deassert_irq(viornd.vm_id, 0, viornd.irq); + case VIO1_PCI_CONFIG_GENERATION: + res = pci_cfg->config_generation; break; + case VIO1_PCI_QUEUE_SELECT: + res = pci_cfg->queue_select; + break; + case VIO1_PCI_QUEUE_SIZE: + res = pci_cfg->queue_size; + break; + case VIO1_PCI_QUEUE_MSIX_VECTOR: + res = VIRTIO_MSI_NO_VECTOR; /* Unsupported */ + break; + case VIO1_PCI_QUEUE_ENABLE: + res = pci_cfg->queue_enable; + break; + case VIO1_PCI_QUEUE_NOTIFY_OFF: + res = pci_cfg->queue_notify_off; + break; + case VIO1_PCI_QUEUE_DESC: + res = (uint32_t)(0xFFFFFFFF & pci_cfg->queue_desc); + break; + case VIO1_PCI_QUEUE_DESC + 4: + res = (uint32_t)(pci_cfg->queue_desc >> 32); + break; + case VIO1_PCI_QUEUE_AVAIL: + res = (uint32_t)(0xFFFFFFFF & pci_cfg->queue_avail); + break; + case VIO1_PCI_QUEUE_AVAIL + 4: + res = (uint32_t)(pci_cfg->queue_avail >> 32); + break; + case VIO1_PCI_QUEUE_USED: + res = (uint32_t)(0xFFFFFFFF & pci_cfg->queue_used); + break; + case VIO1_PCI_QUEUE_USED + 4: + res = (uint32_t)(pci_cfg->queue_used >> 32); + break; + default: + log_warnx("%s: invalid register 0x%04x", __func__, reg); } } + + DPRINTF("%s: dev=%u %s sz=%u dir=%s data=0x%04x", __func__, dev->pci_id, + virtio1_reg_name(reg), sz, (dir == VEI_DIR_OUT) ? "w" : "r", + (dir == VEI_DIR_OUT) ? data : res); + + return (res); +} + +static int +virtio_io_isr(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, + void *arg, uint8_t sz) +{ + struct virtio_dev *dev = (struct virtio_dev *)arg; + *intr = 0xFF; + + DPRINTF("%s: dev=%u, reg=0x%04x, sz=%u, dir=%s", __func__, + dev->pci_id, reg, sz, + (dir == VEI_DIR_OUT) ? "write" : "read"); + + /* Limit to in-process devices. */ + if (dev->device_id == PCI_PRODUCT_VIRTIO_BLOCK || + dev->device_id == PCI_PRODUCT_VIRTIO_NETWORK) + fatalx("%s: cannot use on multi-process virtio dev", __func__); + + if (dir == VEI_DIR_IN) { + *data = dev->isr; + dev->isr = 0; + vcpu_deassert_irq(dev->vm_id, 0, dev->irq); + } + return (0); } +static int +virtio_io_notify(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, + void *arg, uint8_t sz) +{ + int raise_intr = 0; + struct virtio_dev *dev = (struct virtio_dev *)arg; + uint16_t vq_idx = (uint16_t)(0x0000ffff & *data); + + *intr = 0xFF; + + DPRINTF("%s: reg=0x%04x, sz=%u, vq_idx=%u, dir=%s", __func__, reg, sz, + vq_idx, (dir == VEI_DIR_OUT) ? "write" : "read"); + + /* Limit this handler to in-process devices */ + if (dev->device_id == PCI_PRODUCT_VIRTIO_BLOCK || + dev->device_id == PCI_PRODUCT_VIRTIO_NETWORK) + fatalx("%s: cannot use on multi-process virtio dev", __func__); + + if (vq_idx >= dev->num_queues) { + log_warnx("%s: invalid virtqueue index %u", __func__, vq_idx); + return (0); + } + + if (dir == VEI_DIR_IN) { + /* Behavior is undefined. */ + *data = (uint32_t)(-1); + return (0); + } + + switch (dev->device_id) { + case PCI_PRODUCT_VIRTIO_ENTROPY: + raise_intr = viornd_notifyq(dev, vq_idx); + break; + case PCI_PRODUCT_VIRTIO_SCSI: + raise_intr = vioscsi_notifyq(dev, vq_idx); + break; + case PCI_PRODUCT_VIRTIO_VMMCI: + /* Does not use a virtqueue. */ + break; + default: + log_warnx("%s: invalid device type %u", __func__, + dev->device_id); + } + + if (raise_intr) + *intr = 1; + + return (0); +} + /* * vmmci_ctl * @@ -284,20 +716,24 @@ virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, u * Called by the vm process's event(3) loop. */ int -vmmci_ctl(unsigned int cmd) +vmmci_ctl(struct virtio_dev *dev, unsigned int cmd) { int ret = 0; struct timeval tv = { 0, 0 }; + struct vmmci_dev *v = NULL; - mutex_lock(&vmmci.mutex); + if (dev->device_id != PCI_PRODUCT_VIRTIO_VMMCI) + fatalx("%s: device is not a vmmci device", __func__); + v = &dev->vmmci; - if ((vmmci.cfg.device_status & - VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) { + mutex_lock(&v->mutex); + + if ((dev->status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) { ret = -1; goto unlock; } - if (cmd == vmmci.cmd) + if (cmd == v->cmd) goto unlock; switch (cmd) { @@ -306,7 +742,7 @@ vmmci_ctl(unsigned int cmd) case VMMCI_SHUTDOWN: case VMMCI_REBOOT: /* Update command */ - vmmci.cmd = cmd; + v->cmd = cmd; /* * vmm VMs do not support powerdown, send a reboot request @@ -316,20 +752,20 @@ vmmci_ctl(unsigned int cmd) cmd = VMMCI_REBOOT; /* Trigger interrupt */ - vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; - vcpu_assert_irq(vmmci.vm_id, 0, vmmci.irq); + dev->isr = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; + vcpu_assert_irq(dev->vm_id, 0, dev->irq); /* Add ACK timeout */ tv.tv_sec = VMMCI_TIMEOUT_SHORT; - evtimer_add(&vmmci.timeout, &tv); + evtimer_add(&v->timeout, &tv); break; case VMMCI_SYNCRTC: if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) { /* RTC updated, request guest VM resync of its RTC */ - vmmci.cmd = cmd; + v->cmd = cmd; - vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; - vcpu_assert_irq(vmmci.vm_id, 0, vmmci.irq); + dev->isr = VIRTIO_CONFIG_ISR_CONFIG_CHANGE; + vcpu_assert_irq(dev->vm_id, 0, dev->irq); } else { log_debug("%s: RTC sync skipped (guest does not " "support RTC sync)\n", __func__); @@ -340,7 +776,7 @@ vmmci_ctl(unsigned int cmd) } unlock: - mutex_unlock(&vmmci.mutex); + mutex_unlock(&v->mutex); return (ret); } @@ -352,9 +788,15 @@ unlock: * * Called by the vcpu thread. Must be called with the mutex held. */ -void -vmmci_ack(unsigned int cmd) +static void +vmmci_ack(struct virtio_dev *dev, unsigned int cmd) { + struct vmmci_dev *v = NULL; + + if (dev->device_id != PCI_PRODUCT_VIRTIO_VMMCI) + fatalx("%s: device is not a vmmci device", __func__); + v = &dev->vmmci; + switch (cmd) { case VMMCI_NONE: break; @@ -365,10 +807,10 @@ vmmci_ack(unsigned int cmd) * timeout to give the VM a chance to reboot before the * timer is expired. */ - if (vmmci.cmd == 0) { + if (v->cmd == 0) { log_debug("%s: vm %u requested shutdown", __func__, - vmmci.vm_id); - vm_pipe_send(&vmmci.dev_pipe, VMMCI_SET_TIMEOUT_SHORT); + dev->vm_id); + vm_pipe_send(&v->dev_pipe, VMMCI_SET_TIMEOUT_SHORT); return; } /* FALLTHROUGH */ @@ -380,16 +822,16 @@ vmmci_ack(unsigned int cmd) * rc.shutdown on the VM), so increase the timeout before * killing it forcefully. */ - if (cmd == vmmci.cmd) { + if (cmd == v->cmd) { log_debug("%s: vm %u acknowledged shutdown request", - __func__, vmmci.vm_id); - vm_pipe_send(&vmmci.dev_pipe, VMMCI_SET_TIMEOUT_LONG); + __func__, dev->vm_id); + vm_pipe_send(&v->dev_pipe, VMMCI_SET_TIMEOUT_LONG); } break; case VMMCI_SYNCRTC: log_debug("%s: vm %u acknowledged RTC sync request", - __func__, vmmci.vm_id); - vmmci.cmd = VMMCI_NONE; + __func__, dev->vm_id); + v->cmd = VMMCI_NONE; break; default: log_warnx("%s: illegal request %u", __func__, cmd); @@ -400,17 +842,32 @@ vmmci_ack(unsigned int cmd) void vmmci_timeout(int fd, short type, void *arg) { - log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id); - vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN); + struct virtio_dev *dev = (struct virtio_dev *)arg; + struct vmmci_dev *v = NULL; + + if (dev->device_id != PCI_PRODUCT_VIRTIO_VMMCI) + fatalx("%s: device is not a vmmci device", __func__); + v = &dev->vmmci; + + log_debug("%s: vm %u shutdown", __progname, dev->vm_id); + vm_shutdown(v->cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN); } int vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, - void *unused, uint8_t sz) + void *arg, uint8_t sz) { + struct virtio_dev *dev = (struct virtio_dev *)arg; + struct vmmci_dev *v = NULL; + + if (dev->device_id != PCI_PRODUCT_VIRTIO_VMMCI) + fatalx("%s: device is not a vmmci device (%u)", + __func__, dev->device_id); + v = &dev->vmmci; + *intr = 0xFF; - mutex_lock(&vmmci.mutex); + mutex_lock(&v->mutex); if (dir == 0) { switch (reg) { case VIRTIO_CONFIG_DEVICE_FEATURES: @@ -420,72 +877,72 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_ __progname, *data, virtio_reg_name(reg)); break; case VIRTIO_CONFIG_GUEST_FEATURES: - vmmci.cfg.guest_feature = *data; + dev->cfg.guest_feature = *data; break; case VIRTIO_CONFIG_QUEUE_PFN: - vmmci.cfg.queue_pfn = *data; + dev->cfg.queue_pfn = *data; break; case VIRTIO_CONFIG_QUEUE_SELECT: - vmmci.cfg.queue_select = *data; + dev->cfg.queue_select = *data; break; case VIRTIO_CONFIG_QUEUE_NOTIFY: - vmmci.cfg.queue_notify = *data; + dev->cfg.queue_notify = *data; break; case VIRTIO_CONFIG_DEVICE_STATUS: - vmmci.cfg.device_status = *data; + dev->status = *data; break; case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: - vmmci_ack(*data); + vmmci_ack(dev, *data); break; } } else { switch (reg) { case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: - *data = vmmci.cmd; + *data = v->cmd; break; case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: /* Update time once when reading the first register */ - gettimeofday(&vmmci.time, NULL); - *data = (uint64_t)vmmci.time.tv_sec; + gettimeofday(&v->time, NULL); + *data = (uint64_t)v->time.tv_sec; break; case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: - *data = (uint64_t)vmmci.time.tv_sec << 32; + *data = (uint64_t)v->time.tv_sec << 32; break; case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: - *data = (uint64_t)vmmci.time.tv_usec; + *data = (uint64_t)v->time.tv_usec; break; case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: - *data = (uint64_t)vmmci.time.tv_usec << 32; + *data = (uint64_t)v->time.tv_usec << 32; break; case VIRTIO_CONFIG_DEVICE_FEATURES: - *data = vmmci.cfg.device_feature; + *data = dev->cfg.device_feature; break; case VIRTIO_CONFIG_GUEST_FEATURES: - *data = vmmci.cfg.guest_feature; + *data = dev->cfg.guest_feature; break; case VIRTIO_CONFIG_QUEUE_PFN: - *data = vmmci.cfg.queue_pfn; + *data = dev->cfg.queue_pfn; break; case VIRTIO_CONFIG_QUEUE_SIZE: - *data = vmmci.cfg.queue_size; + *data = dev->cfg.queue_size; break; case VIRTIO_CONFIG_QUEUE_SELECT: - *data = vmmci.cfg.queue_select; + *data = dev->cfg.queue_select; break; case VIRTIO_CONFIG_QUEUE_NOTIFY: - *data = vmmci.cfg.queue_notify; + *data = dev->cfg.queue_notify; break; case VIRTIO_CONFIG_DEVICE_STATUS: - *data = vmmci.cfg.device_status; + *data = dev->status; break; case VIRTIO_CONFIG_ISR_STATUS: - *data = vmmci.cfg.isr_status; - vmmci.cfg.isr_status = 0; - vcpu_deassert_irq(vmmci.vm_id, 0, vmmci.irq); + *data = dev->isr; + dev->isr = 0; + vcpu_deassert_irq(dev->vm_id, 0, dev->irq); break; } } - mutex_unlock(&vmmci.mutex); + mutex_unlock(&v->mutex); return (0); } @@ -506,18 +963,20 @@ virtio_get_base(int fd, char *path, size_t npath, int static void vmmci_pipe_dispatch(int fd, short event, void *arg) { - enum pipe_msg_type msg; - struct timeval tv = { 0, 0 }; + struct virtio_dev *dev = (struct virtio_dev *)arg; + struct vmmci_dev *v = &dev->vmmci; + struct timeval tv = { 0, 0 }; + enum pipe_msg_type msg; - msg = vm_pipe_recv(&vmmci.dev_pipe); + msg = vm_pipe_recv(&v->dev_pipe); switch (msg) { case VMMCI_SET_TIMEOUT_SHORT: tv.tv_sec = VMMCI_TIMEOUT_SHORT; - evtimer_add(&vmmci.timeout, &tv); + evtimer_add(&v->timeout, &tv); break; case VMMCI_SET_TIMEOUT_LONG: tv.tv_sec = VMMCI_TIMEOUT_LONG; - evtimer_add(&vmmci.timeout, &tv); + evtimer_add(&v->timeout, &tv); break; default: log_warnx("%s: invalid pipe message type %d", __func__, msg); @@ -531,98 +990,78 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, struct vmop_create_params *vmc = &vm->vm_params; struct vm_create_params *vcp = &vmc->vmc_params; struct virtio_dev *dev; - uint8_t id; - uint8_t i, j; - int ret = 0; + uint8_t id, i, j; + int bar_id, ret = 0; - /* Virtio entropy device */ + SLIST_INIT(&virtio_devs); + + /* Virtio 1.x Entropy Device */ if (pci_add_device(&id, PCI_VENDOR_QUMRANET, - PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM, - PCI_SUBCLASS_SYSTEM_MISC, - PCI_VENDOR_OPENBSD, - PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) { + PCI_PRODUCT_QUMRANET_VIO1_RNG, PCI_CLASS_SYSTEM, + PCI_SUBCLASS_SYSTEM_MISC, PCI_VENDOR_OPENBSD, + PCI_PRODUCT_VIRTIO_ENTROPY, 1, 1, NULL)) { log_warnx("%s: can't add PCI virtio rng device", __progname); return; } + virtio_dev_init(&viornd, id, VIORND_QUEUE_SIZE_DEFAULT, + VIRTIO_RND_QUEUES, VIRTIO_F_VERSION_1, vcp->vcp_id); - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) { + bar_id = pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_io_dispatch, + &viornd); + if (bar_id == -1 || bar_id > 0xff) { log_warnx("%s: can't add bar for virtio rng device", __progname); return; } + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_COMMON_CFG, bar_id, 0); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_ISR_CFG, bar_id, 0); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_NOTIFY_CFG, bar_id, 0); - memset(&viornd, 0, sizeof(viornd)); - viornd.vq[0].qs = VIORND_QUEUE_SIZE; - viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) * - VIORND_QUEUE_SIZE; - viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( - sizeof(struct vring_desc) * VIORND_QUEUE_SIZE - + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE)); - viornd.pci_id = id; - viornd.irq = pci_get_dev_irq(id); - viornd.vm_id = vcp->vcp_id; - - SLIST_INIT(&virtio_devs); - + /* Virtio 1.x Network Devices */ if (vmc->vmc_nnics > 0) { for (i = 0; i < vmc->vmc_nnics; i++) { - dev = calloc(1, sizeof(struct virtio_dev)); + dev = malloc(sizeof(struct virtio_dev)); if (dev == NULL) { log_warn("%s: calloc failure allocating vionet", __progname); return; } - /* Virtio network */ - dev->dev_type = VMD_DEVTYPE_NET; - if (pci_add_device(&id, PCI_VENDOR_QUMRANET, - PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM, + PCI_PRODUCT_QUMRANET_VIO1_NET, PCI_CLASS_SYSTEM, PCI_SUBCLASS_SYSTEM_MISC, PCI_VENDOR_OPENBSD, - PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) { + PCI_PRODUCT_VIRTIO_NETWORK, 1, 1, NULL)) { log_warnx("%s: can't add PCI virtio net device", __progname); return; } - dev->pci_id = id; - dev->sync_fd = -1; - dev->async_fd = -1; - dev->vm_id = vcp->vcp_id; - dev->vm_vmid = vm->vm_vmid; - dev->irq = pci_get_dev_irq(id); + virtio_dev_init(dev, id, VIONET_QUEUE_SIZE_DEFAULT, + VIRTIO_NET_QUEUES, + (VIRTIO_NET_F_MAC | VIRTIO_F_VERSION_1), + vcp->vcp_id); - /* The vionet pci bar function is called by the vcpu. */ if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io, - dev)) { + dev) == -1) { log_warnx("%s: can't add bar for virtio net " "device", __progname); return; } + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_COMMON_CFG, + bar_id, 0); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_DEVICE_CFG, + bar_id, 8); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_ISR_CFG, bar_id, + 0); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_NOTIFY_CFG, + bar_id, 0); - dev->vionet.vq[RXQ].qs = VIONET_QUEUE_SIZE; - dev->vionet.vq[RXQ].vq_availoffset = - sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; - dev->vionet.vq[RXQ].vq_usedoffset = VIRTQUEUE_ALIGN( - sizeof(struct vring_desc) * VIONET_QUEUE_SIZE - + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); - dev->vionet.vq[RXQ].last_avail = 0; - dev->vionet.vq[RXQ].notified_avail = 0; - - dev->vionet.vq[TXQ].qs = VIONET_QUEUE_SIZE; - dev->vionet.vq[TXQ].vq_availoffset = - sizeof(struct vring_desc) * VIONET_QUEUE_SIZE; - dev->vionet.vq[TXQ].vq_usedoffset = VIRTQUEUE_ALIGN( - sizeof(struct vring_desc) * VIONET_QUEUE_SIZE - + sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE)); - dev->vionet.vq[TXQ].last_avail = 0; - dev->vionet.vq[TXQ].notified_avail = 0; - + /* Device specific initializiation. */ + dev->dev_type = VMD_DEVTYPE_NET; + dev->vm_vmid = vm->vm_vmid; dev->vionet.data_fd = child_taps[i]; /* MAC address has been assigned by the parent */ memcpy(&dev->vionet.mac, &vmc->vmc_macs[i], 6); - dev->vionet.cfg.device_feature = VIRTIO_NET_F_MAC; - dev->vionet.lockedmac = vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0; dev->vionet.local = @@ -645,52 +1084,50 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, } } + /* Virtio 1.x Block Devices */ if (vmc->vmc_ndisks > 0) { for (i = 0; i < vmc->vmc_ndisks; i++) { - dev = calloc(1, sizeof(struct virtio_dev)); + dev = malloc(sizeof(struct virtio_dev)); if (dev == NULL) { - log_warn("%s: calloc failure allocating vioblk", - __progname); + log_warn("%s: failure allocating vioblk", + __func__); return; } - - /* One vioblk device for each disk defined in vcp */ - dev->dev_type = VMD_DEVTYPE_DISK; - if (pci_add_device(&id, PCI_VENDOR_QUMRANET, - PCI_PRODUCT_QUMRANET_VIO_BLOCK, + PCI_PRODUCT_QUMRANET_VIO1_BLOCK, PCI_CLASS_MASS_STORAGE, - PCI_SUBCLASS_MASS_STORAGE_SCSI, - PCI_VENDOR_OPENBSD, - PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) { + PCI_SUBCLASS_MASS_STORAGE_SCSI, PCI_VENDOR_OPENBSD, + PCI_PRODUCT_VIRTIO_BLOCK, 1, 1, NULL)) { log_warnx("%s: can't add PCI virtio block " "device", __progname); return; } - dev->pci_id = id; - dev->sync_fd = -1; - dev->async_fd = -1; - dev->vm_id = vcp->vcp_id; - dev->vm_vmid = vm->vm_vmid; - dev->irq = pci_get_dev_irq(id); + virtio_dev_init(dev, id, VIOBLK_QUEUE_SIZE_DEFAULT, + VIRTIO_BLK_QUEUES, + (VIRTIO_F_VERSION_1 | VIRTIO_BLK_F_SEG_MAX), + vcp->vcp_id); - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io, - &dev->vioblk)) { + bar_id = pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io, + dev); + if (bar_id == -1 || bar_id > 0xff) { log_warnx("%s: can't add bar for virtio block " "device", __progname); return; } - dev->vioblk.vq[0].qs = VIOBLK_QUEUE_SIZE; - dev->vioblk.vq[0].vq_availoffset = - sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE; - dev->vioblk.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN( - sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE - + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE)); - dev->vioblk.vq[0].last_avail = 0; - dev->vioblk.cfg.device_feature = - VIRTIO_BLK_F_SEG_MAX; - dev->vioblk.seg_max = VIOBLK_SEG_MAX; + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_COMMON_CFG, + bar_id, 0); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_DEVICE_CFG, + bar_id, 24); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_ISR_CFG, bar_id, + 0); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_NOTIFY_CFG, + bar_id, 0); + /* Device specific initialization. */ + dev->dev_type = VMD_DEVTYPE_DISK; + dev->vm_vmid = vm->vm_vmid; + dev->vioblk.seg_max = VIOBLK_SEG_MAX_DEFAULT; + /* * Initialize disk fds to an invalid fd (-1), then * set any child disk fds. @@ -714,89 +1151,74 @@ virtio_init(struct vmd_vm *vm, int child_cdrom, fatalx("failed to launch virtio device"); } - /* vioscsi cdrom */ + /* Virtio 1.x SCSI CD-ROM */ if (strlen(vmc->vmc_cdrom)) { - vioscsi = calloc(1, sizeof(struct vioscsi_dev)); - if (vioscsi == NULL) { + dev = malloc(sizeof(struct virtio_dev)); + if (dev == NULL) { log_warn("%s: calloc failure allocating vioscsi", __progname); return; } - if (pci_add_device(&id, PCI_VENDOR_QUMRANET, - PCI_PRODUCT_QUMRANET_VIO_SCSI, - PCI_CLASS_MASS_STORAGE, - PCI_SUBCLASS_MASS_STORAGE_SCSI, - PCI_VENDOR_OPENBSD, - PCI_PRODUCT_VIRTIO_SCSI, 1, NULL)) { + PCI_PRODUCT_QUMRANET_VIO1_SCSI, PCI_CLASS_MASS_STORAGE, + PCI_SUBCLASS_MASS_STORAGE_SCSI, PCI_VENDOR_OPENBSD, + PCI_PRODUCT_VIRTIO_SCSI, 1, 1, NULL)) { log_warnx("%s: can't add PCI vioscsi device", __progname); return; } - - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vioscsi_io, vioscsi)) { + virtio_dev_init(dev, id, VIOSCSI_QUEUE_SIZE_DEFAULT, + VIRTIO_SCSI_QUEUES, VIRTIO_F_VERSION_1, vcp->vcp_id); + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_io_dispatch, dev) + == -1) { log_warnx("%s: can't add bar for vioscsi device", __progname); return; } + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_COMMON_CFG, bar_id, 0); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_DEVICE_CFG, bar_id, 36); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_ISR_CFG, bar_id, 0); + virtio_pci_add_cap(id, VIRTIO_PCI_CAP_NOTIFY_CFG, bar_id, 0); - for (i = 0; i < VIRTIO_MAX_QUEUES; i++) { - vioscsi->vq[i].qs = VIOSCSI_QUEUE_SIZE; - vioscsi->vq[i].vq_availoffset = - sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE; - vioscsi->vq[i].vq_usedoffset = VIRTQUEUE_ALIGN( - sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE - + sizeof(uint16_t) * (2 + VIOSCSI_QUEUE_SIZE)); - vioscsi->vq[i].last_avail = 0; - } - if (virtio_raw_init(&vioscsi->file, &vioscsi->sz, &child_cdrom, - 1) == -1) { + /* Device specific initialization. */ + if (virtio_raw_init(&dev->vioscsi.file, &dev->vioscsi.sz, + &child_cdrom, 1) == -1) { log_warnx("%s: unable to determine iso format", __func__); return; } - vioscsi->locked = 0; - vioscsi->lba = 0; - vioscsi->n_blocks = vioscsi->sz / VIOSCSI_BLOCK_SIZE_CDROM; - vioscsi->max_xfer = VIOSCSI_BLOCK_SIZE_CDROM; - vioscsi->pci_id = id; - vioscsi->vm_id = vcp->vcp_id; - vioscsi->irq = pci_get_dev_irq(id); + dev->vioscsi.locked = 0; + dev->vioscsi.lba = 0; + dev->vioscsi.n_blocks = dev->vioscsi.sz / + VIOSCSI_BLOCK_SIZE_CDROM; + dev->vioscsi.max_xfer = VIOSCSI_BLOCK_SIZE_CDROM; } - /* virtio control device */ - if (pci_add_device(&id, PCI_VENDOR_OPENBSD, - PCI_PRODUCT_OPENBSD_CONTROL, - PCI_CLASS_COMMUNICATIONS, - PCI_SUBCLASS_COMMUNICATIONS_MISC, - PCI_VENDOR_OPENBSD, - PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) { + /* Virtio 0.9 VMM Control Interface */ + dev = &vmmci; + if (pci_add_device(&id, PCI_VENDOR_OPENBSD, PCI_PRODUCT_OPENBSD_CONTROL, + PCI_CLASS_COMMUNICATIONS, PCI_SUBCLASS_COMMUNICATIONS_MISC, + PCI_VENDOR_OPENBSD, PCI_PRODUCT_VIRTIO_VMMCI, 0, 1, NULL)) { log_warnx("%s: can't add PCI vmm control device", __progname); return; } - - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) { + virtio_dev_init(dev, id, 0, 0, + VMMCI_F_TIMESYNC | VMMCI_F_ACK | VMMCI_F_SYNCRTC, vcp->vcp_id); + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, dev) == -1) { log_warnx("%s: can't add bar for vmm control device", __progname); return; } - memset(&vmmci, 0, sizeof(vmmci)); - vmmci.cfg.device_feature = VMMCI_F_TIMESYNC | VMMCI_F_ACK | - VMMCI_F_SYNCRTC; - vmmci.vm_id = vcp->vcp_id; - vmmci.irq = pci_get_dev_irq(id); - vmmci.pci_id = id; - ret = pthread_mutex_init(&vmmci.mutex, NULL); + ret = pthread_mutex_init(&dev->vmmci.mutex, NULL); if (ret) { errno = ret; fatal("could not initialize vmmci mutex"); } - - evtimer_set(&vmmci.timeout, vmmci_timeout, NULL); - vm_pipe_init(&vmmci.dev_pipe, vmmci_pipe_dispatch); - event_add(&vmmci.dev_pipe.read_ev, NULL); + evtimer_set(&dev->vmmci.timeout, vmmci_timeout, NULL); + vm_pipe_init2(&dev->vmmci.dev_pipe, vmmci_pipe_dispatch, dev); + event_add(&dev->vmmci.dev_pipe.read_ev, NULL); } /* @@ -855,7 +1277,7 @@ virtio_shutdown(struct vmd_vm *vm) /* Ensure that our disks are synced. */ if (vioscsi != NULL) - vioscsi->file.close(vioscsi->file.p, 0); + vioscsi->vioscsi.file.close(vioscsi->vioscsi.file.p, 0); /* * Broadcast shutdown to child devices. We need to do this @@ -923,6 +1345,125 @@ virtio_start(struct vmd_vm *vm) } /* + * Initialize a new virtio device structure. + */ +static void +virtio_dev_init(struct virtio_dev *dev, uint8_t pci_id, uint16_t queue_size, + uint16_t num_queues, uint64_t features, uint32_t vm_id) +{ + size_t i; + uint16_t device_id; + + if (num_queues > 0 && num_queues > VIRTIO_MAX_QUEUES) + fatalx("%s: num_queues too large", __func__); + + device_id = pci_get_subsys_id(pci_id); + if (!device_id) + fatalx("%s: invalid pci device id %u", __func__, pci_id); + + memset(dev, 0, sizeof(*dev)); + + dev->pci_id = pci_id; + dev->device_id = device_id; + dev->irq = pci_get_dev_irq(pci_id); + dev->isr = 0; + dev->vm_id = vm_id; + + dev->device_feature = features; + + dev->pci_cfg.config_generation = 0; + dev->cfg.device_feature = features; + + dev->num_queues = num_queues; + dev->queue_size = queue_size; + dev->cfg.queue_size = queue_size; + + dev->async_fd = -1; + dev->sync_fd = -1; + + if (num_queues > 0) { + for (i = 0; i < num_queues; i++) + virtio_vq_init(dev, i); + } +} + +void +virtio_vq_init(struct virtio_dev *dev, size_t idx) +{ + struct virtio_vq_info *vq_info = NULL; + int v1 = (dev->device_feature & VIRTIO_F_VERSION_1) ? 1 : 0; + + if (idx >= dev->num_queues) + fatalx("%s: invalid virtqueue index", __func__); + vq_info = &dev->vq[idx]; + + vq_info->q_gpa = 0; + vq_info->qs = dev->queue_size; + vq_info->mask = dev->queue_size - 1; + + if (v1) { + vq_info->vq_enabled = 0; + vq_info->vq_availoffset = 0; + vq_info->vq_usedoffset = 0; + } else { + /* Always enable on pre-1.0 virtio devices. */ + vq_info->vq_enabled = 1; + vq_info->vq_availoffset = + sizeof(struct vring_desc) * vq_info->qs; + vq_info->vq_usedoffset = VIRTQUEUE_ALIGN( + sizeof(struct vring_desc) * vq_info->qs + + sizeof(uint16_t) * (2 + vq_info->qs)); + } + + vq_info->last_avail = 0; + vq_info->notified_avail = 0; +} + + +static void +virtio_pci_add_cap(uint8_t pci_id, uint8_t cfg_type, uint8_t bar_id, + uint32_t dev_cfg_len) +{ + struct virtio_pci_common_cap cap; + + memset(&cap, 0, sizeof(cap)); + + cap.virtio.cap_vndr = PCI_CAP_VENDSPEC; + cap.virtio.cap_len = sizeof(struct virtio_pci_cap); + cap.virtio.bar = bar_id; + cap.virtio.cfg_type = cfg_type; + + switch (cfg_type) { + case VIRTIO_PCI_CAP_COMMON_CFG: + cap.virtio.offset = VIO1_CFG_BAR_OFFSET; + cap.virtio.length = sizeof(struct virtio_pci_common_cfg); + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + /* XXX maybe inspect the virtio device and lookup the len. */ + cap.virtio.offset = VIO1_DEV_BAR_OFFSET; + cap.virtio.length = dev_cfg_len; + break; + case VIRTIO_PCI_CAP_ISR_CFG: + cap.virtio.offset = VIO1_ISR_BAR_OFFSET; + cap.virtio.length = sizeof(uint8_t); + break; + case VIRTIO_PCI_CAP_NOTIFY_CFG: + cap.virtio.offset = VIO1_NOTIFY_BAR_OFFSET; + cap.virtio.length = sizeof(uint16_t); + cap.notify.notify_off_multiplier = 0; + break; + default: + fatalx("%s: invalid pci capability config type %u", __func__, + cfg_type); + } + + if (pci_add_capability(pci_id, &cap.pci) == -1) { + fatalx("%s: can't add capability for virtio pci device %u", + __func__, pci_id); + } +} + +/* * Fork+exec a child virtio device. Returns 0 on success. */ static int @@ -1207,14 +1748,13 @@ static int handle_dev_msg(struct viodev_msg *msg, struct virtio_dev *gdev) { uint32_t vm_id = gdev->vm_id; - int irq = gdev->irq; switch (msg->type) { case VIODEV_MSG_KICK: if (msg->state == INTR_STATE_ASSERT) - vcpu_assert_irq(vm_id, msg->vcpu, irq); + vcpu_assert_irq(vm_id, msg->vcpu, msg->irq); else if (msg->state == INTR_STATE_DEASSERT) - vcpu_deassert_irq(vm_id, msg->vcpu, irq); + vcpu_deassert_irq(vm_id, msg->vcpu, msg->irq); break; case VIODEV_MSG_READY: log_debug("%s: device reports ready", __func__); @@ -1305,10 +1845,8 @@ virtio_pci_io(int dir, uint16_t reg, uint32_t *data, u imsg_free(&imsg); if (msg.type == VIODEV_MSG_IO_READ && msg.data_valid) { -#if DEBUG - log_debug("%s: got sync read response (reg=%s)", - __func__, virtio_reg_name(msg.reg)); -#endif /* DEBUG */ + DPRINTF("%s: got sync read response (reg=%s)", __func__, + virtio_reg_name(msg.reg)); *data = msg.data; /* * It's possible we're asked to {de,}assert after the blob - 4bd6f68b41fcb8ce68781903e4364e543adc0075 blob + 78c235952af5d461088f2ebdbfb3e4b6766bb12d --- usr.sbin/vmd/virtio.h +++ usr.sbin/vmd/virtio.h @@ -19,11 +19,13 @@ #include #include +#include #include #include #include "vmd.h" +#include "pci.h" #ifndef _VIRTIO_H_ #define _VIRTIO_H_ @@ -33,20 +35,53 @@ #define ALIGNSZ(sz, align) ((sz + align - 1) & ~(align - 1)) #define MIN(a,b) (((a)<(b))?(a):(b)) +#define VIO1_PCI_DEVICE_FEATURE_SELECT \ + (offsetof(struct virtio_pci_common_cfg, device_feature_select)) +#define VIO1_PCI_DEVICE_FEATURE \ + (offsetof(struct virtio_pci_common_cfg, device_feature)) +#define VIO1_PCI_DRIVER_FEATURE_SELECT \ + (offsetof(struct virtio_pci_common_cfg, driver_feature_select)) +#define VIO1_PCI_DRIVER_FEATURE \ + (offsetof(struct virtio_pci_common_cfg, driver_feature)) +#define VIO1_PCI_CONFIG_MSIX_VECTOR \ + (offsetof(struct virtio_pci_common_cfg, config_msix_vector)) +#define VIO1_PCI_NUM_QUEUES \ + (offsetof(struct virtio_pci_common_cfg, num_queues)) +#define VIO1_PCI_DEVICE_STATUS \ + (offsetof(struct virtio_pci_common_cfg, device_status)) +#define VIO1_PCI_CONFIG_GENERATION \ + (offsetof(struct virtio_pci_common_cfg, config_generation)) +#define VIO1_PCI_QUEUE_SELECT \ + (offsetof(struct virtio_pci_common_cfg, queue_select)) +#define VIO1_PCI_QUEUE_SIZE \ + (offsetof(struct virtio_pci_common_cfg, queue_size)) +#define VIO1_PCI_QUEUE_MSIX_VECTOR \ + (offsetof(struct virtio_pci_common_cfg, queue_msix_vector)) +#define VIO1_PCI_QUEUE_ENABLE \ + (offsetof(struct virtio_pci_common_cfg, queue_enable)) +#define VIO1_PCI_QUEUE_NOTIFY_OFF \ + (offsetof(struct virtio_pci_common_cfg, queue_notify_off)) +#define VIO1_PCI_QUEUE_DESC \ + (offsetof(struct virtio_pci_common_cfg, queue_desc)) +#define VIO1_PCI_QUEUE_AVAIL \ + (offsetof(struct virtio_pci_common_cfg, queue_avail)) +#define VIO1_PCI_QUEUE_USED \ + (offsetof(struct virtio_pci_common_cfg, queue_used)) + +#define VIO1_CFG_BAR_OFFSET 0x000 +#define VIO1_NOTIFY_BAR_OFFSET 0x100 +#define VIO1_ISR_BAR_OFFSET 0x200 +#define VIO1_DEV_BAR_OFFSET 0x300 + /* Queue sizes must be power of two and less than IOV_MAX (1024). */ -#define VIORND_QUEUE_SIZE 64 -#define VIORND_QUEUE_MASK (VIORND_QUEUE_SIZE - 1) +#define VIRTIO_QUEUE_SIZE_MAX IOV_MAX +#define VIORND_QUEUE_SIZE_DEFAULT 64 +#define VIOBLK_QUEUE_SIZE_DEFAULT 128 +#define VIOSCSI_QUEUE_SIZE_DEFAULT 128 +#define VIONET_QUEUE_SIZE_DEFAULT 256 -#define VIOBLK_QUEUE_SIZE 128 -#define VIOBLK_QUEUE_MASK (VIOBLK_QUEUE_SIZE - 1) -#define VIOBLK_SEG_MAX (VIOBLK_QUEUE_SIZE - 2) +#define VIOBLK_SEG_MAX_DEFAULT (VIOBLK_QUEUE_SIZE_DEFAULT - 2) -#define VIOSCSI_QUEUE_SIZE 128 -#define VIOSCSI_QUEUE_MASK (VIOSCSI_QUEUE_SIZE - 1) - -#define VIONET_QUEUE_SIZE 256 -#define VIONET_QUEUE_MASK (VIONET_QUEUE_SIZE - 1) - /* Virtio network device is backed by tap(4), so inherit limits */ #define VIONET_HARD_MTU TUNMRU #define VIONET_MIN_TXLEN ETHER_HDR_LEN @@ -56,12 +91,15 @@ #define VMMCI_TIMEOUT_SHORT 3 #define VMMCI_TIMEOUT_LONG 120 -/* All the devices we support have either 1, 2 or 3 queues */ -/* viornd - 1 queue - * vioblk - 1 queue - * vionet - 2 queues - * vioscsi - 3 queues +/* + * All the devices we support have either 1, 2 or 3 queues. + * No devices currently support VIRTIO_*_F_MQ multi-queue features. */ +#define VIRTIO_RND_QUEUES 1 +#define VIRTIO_BLK_QUEUES 1 +#define VIRTIO_NET_QUEUES 2 +#define VIRTIO_SCSI_QUEUES 3 +#define VIRTIO_VMMCI_QUEUES 0 #define VIRTIO_MAX_QUEUES 3 #define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ @@ -74,6 +112,14 @@ #define DESC_WRITABLE(/* struct vring_desc */ x) \ (((x)->flags & VRING_DESC_F_WRITE) ? 1 : 0) +struct virtio_pci_common_cap { + union { + struct pci_cap pci; + struct virtio_pci_cap virtio; + struct virtio_pci_notify_cap notify; + struct virtio_pci_cfg_cap cfg; + }; +} __packed; /* * VM <-> Device messaging. @@ -104,18 +150,15 @@ struct viodev_msg { } __packed; /* - * This struct stores notifications from a virtio driver. There is - * one such struct per virtio device. + * Legacy Virtio 0.9 register state. */ struct virtio_io_cfg { uint32_t device_feature; uint32_t guest_feature; uint32_t queue_pfn; - uint16_t queue_size; uint16_t queue_select; + uint16_t queue_size; uint16_t queue_notify; - uint8_t device_status; - uint8_t isr_status; }; struct virtio_backing { @@ -143,6 +186,9 @@ struct virtio_vq_info { /* Queue size: number of queue entries in virtq */ uint32_t qs; + /* Queue mask */ + uint32_t mask; + /* * The offset of the 'available' ring within the virtq located at * guest physical address qa above @@ -166,6 +212,8 @@ struct virtio_vq_info { * driver notified to the host. */ uint16_t notified_avail; + + uint8_t vq_enabled; }; /* @@ -202,22 +250,10 @@ struct virtio_vq_acct { struct vring_used *used; }; -struct viornd_dev { - struct virtio_io_cfg cfg; - - struct virtio_vq_info vq[VIRTIO_MAX_QUEUES]; - - uint8_t pci_id; - int irq; - uint32_t vm_id; -}; - struct vioblk_dev { - struct virtio_io_cfg cfg; - struct virtio_vq_info vq[VIRTIO_MAX_QUEUES]; struct virtio_backing file; - int disk_fd[VM_MAX_BASE_PER_DISK]; /* fds for disk image(s) */ + uint8_t ndisk_fd; /* number of valid disk fds */ uint64_t capacity; /* size in 512 byte sectors */ uint32_t seg_max; /* maximum number of segments */ @@ -232,31 +268,16 @@ struct vioblk_dev { * 2 - requests */ struct vioscsi_dev { - struct virtio_io_cfg cfg; - - struct virtio_vq_info vq[VIRTIO_MAX_QUEUES]; - struct virtio_backing file; - /* is the device locked */ - int locked; - /* size of iso file in bytes */ - uint64_t sz; - /* last block address read */ - uint64_t lba; - /* number of blocks represented in iso */ - uint64_t n_blocks; + int locked; /* is the device locked? */ + uint64_t sz; /* size of iso file in bytes */ + uint64_t lba; /* last block address read */ + uint64_t n_blocks; /* number of blocks represented in iso */ uint32_t max_xfer; - - uint8_t pci_id; - uint32_t vm_id; - int irq; }; struct vionet_dev { - struct virtio_io_cfg cfg; - struct virtio_vq_info vq[VIRTIO_MAX_QUEUES]; - int data_fd; /* fd for our tap device */ uint8_t mac[6]; @@ -269,28 +290,6 @@ struct vionet_dev { unsigned int idx; }; -struct virtio_dev { - union { - struct vioblk_dev vioblk; - struct vionet_dev vionet; - }; - - struct imsgev async_iev; - struct imsgev sync_iev; - - int sync_fd; /* fd for synchronous channel */ - int async_fd; /* fd for async channel */ - - uint8_t pci_id; - uint32_t vm_id; - uint32_t vm_vmid; - int irq; - - pid_t dev_pid; - char dev_type; - SLIST_ENTRY(virtio_dev) dev_next; -}; - struct virtio_net_hdr { uint8_t flags; uint8_t gso_type; @@ -298,13 +297,17 @@ struct virtio_net_hdr { uint16_t gso_size; uint16_t csum_start; uint16_t csum_offset; + uint16_t num_buffers; /* - * num_buffers is only used if VIRTIO_NET_F_MRG_RXBUF is negotiated. - * vmd(8) doesn't negotiate that, but the field is listed here - * for completeness sake. + * The following fields exist only if VIRTIO_NET_F_HASH_REPORT + * is negotiated. */ -/* uint16_t num_buffers; */ + /* + uint32_t hash_value; + uint16_t hash_report; + uint16_t padding_reserved; + */ }; enum vmmci_cmd { @@ -315,13 +318,9 @@ enum vmmci_cmd { }; struct vmmci_dev { - struct virtio_io_cfg cfg; struct event timeout; struct timeval time; enum vmmci_cmd cmd; - uint32_t vm_id; - int irq; - uint8_t pci_id; pthread_mutex_t mutex; struct vm_dev_pipe dev_pipe; @@ -334,8 +333,52 @@ struct ioinfo { off_t offset; }; +struct virtio_dev { + uint16_t device_id; /* Virtio device id [r] */ + union { + /* Multi-process enabled. */ + struct vioblk_dev vioblk; + struct vionet_dev vionet; + + /* In-process only. */ + struct vmmci_dev vmmci; + struct vioscsi_dev vioscsi; + }; + + struct virtio_io_cfg cfg; /* Virtio 0.9 */ + struct virtio_pci_common_cfg pci_cfg; /* Virtio 1.x */ + struct virtio_vq_info vq[VIRTIO_MAX_QUEUES]; /* Virtqueues */ + + uint16_t num_queues; /* number of virtqueues [r] */ + uint16_t queue_size; /* default queue size [r] */ + + uint8_t isr; /* isr status register [rw] */ + uint8_t status; /* device status register [rw] */ + uint64_t device_feature; /* device features [r] */ + uint64_t driver_feature; /* driver features [rw] */ + + uint8_t pci_id; /* pci device id [r] */ + uint32_t vm_id; /* vmm(4) vm identifier [r] */ + int irq; /* assigned irq [r] */ + + /* Multi-process emulation fields. */ + struct imsgev async_iev; /* async imsg event [r] */ + struct imsgev sync_iev; /* sync imsg event [r] */ + + int sync_fd; /* fd for synchronous channel */ + int async_fd; /* fd for async channel */ + + uint32_t vm_vmid; /* vmd(8) vm identifier [r] */ + pid_t dev_pid; /* pid of emulator process */ + char dev_type; /* device type (as char) */ + SLIST_ENTRY(virtio_dev) dev_next; +}; + /* virtio.c */ +extern struct virtio_dev vmmci; + void virtio_init(struct vmd_vm *, int, int[][VM_MAX_BASE_PER_DISK], int *); +void virtio_vq_init(struct virtio_dev *, size_t); void virtio_broadcast_imsg(struct vmd_vm *, uint16_t, void *, uint16_t); void virtio_stop(struct vmd_vm *); void virtio_start(struct vmd_vm *); @@ -347,11 +390,10 @@ int vm_device_pipe(struct virtio_dev *, void (*)(int, int virtio_pci_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); void virtio_assert_irq(struct virtio_dev *, int); void virtio_deassert_irq(struct virtio_dev *, int); +uint32_t virtio_io_cfg(struct virtio_dev *, int, uint8_t, uint32_t, uint8_t); -int virtio_rnd_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); -void viornd_update_qs(void); -void viornd_update_qa(void); -int viornd_notifyq(void); +void virtio_update_qs(struct virtio_dev *); +void virtio_update_qa(struct virtio_dev *); ssize_t virtio_qcow2_get_base(int, char *, size_t, const char *); int virtio_qcow2_create(const char *, const char *, uint64_t); @@ -362,8 +404,7 @@ int virtio_raw_init(struct virtio_backing *, off_t *, void vionet_set_hostmac(struct vmd_vm *, unsigned int, uint8_t *); int vmmci_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); -int vmmci_ctl(unsigned int); -void vmmci_ack(unsigned int); +int vmmci_ctl(struct virtio_dev *, unsigned int); void vmmci_timeout(int, short, void *); const char *vioblk_cmd_name(uint32_t); @@ -373,9 +414,7 @@ ssize_t dhcp_request(struct virtio_dev *, char *, size /* vioscsi.c */ int vioscsi_io(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t); -void vioscsi_update_qs(struct vioscsi_dev *); -void vioscsi_update_qa(struct vioscsi_dev *); -int vioscsi_notifyq(struct vioscsi_dev *); +int vioscsi_notifyq(struct virtio_dev *, uint16_t); /* imsg handling */ void viodev_msg_read(struct imsg *, struct viodev_msg *); blob - 96958e1f611bc01f1ed9224a54d7a0b1399bae3e blob + 7908417f86e7210f8cd41e6bd96b17b812990252 --- usr.sbin/vmd/vm.c +++ usr.sbin/vmd/vm.c @@ -356,11 +356,11 @@ vm_dispatch_vmm(int fd, short event, void *arg) sizeof(verbose)); break; case IMSG_VMDOP_VM_SHUTDOWN: - if (vmmci_ctl(VMMCI_SHUTDOWN) == -1) + if (vmmci_ctl(&vmmci, VMMCI_SHUTDOWN) == -1) _exit(0); break; case IMSG_VMDOP_VM_REBOOT: - if (vmmci_ctl(VMMCI_REBOOT) == -1) + if (vmmci_ctl(&vmmci, VMMCI_REBOOT) == -1) _exit(0); break; case IMSG_VMDOP_PAUSE_VM: blob - a521d1358fffb4c23170c86931859231b35b5481 blob + 119d411f279311a87efae595a21748b31fbec853 --- usr.sbin/vmd/vmd.h +++ usr.sbin/vmd/vmd.h @@ -43,6 +43,9 @@ #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#define CTASSERT(x) extern char _ctassert[(x) ? 1 : -1 ] \ + __attribute__((__unused__)) + #define MB(x) (x * 1024UL * 1024UL) #define GB(x) (x * 1024UL * 1024UL * 1024UL)