nrelease - fix/improve livecd
[dragonfly.git] / sys / dev / virtual / amazon / ena / ena.c
1 /*-
2  * BSD LICENSE
3  *
4  * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $FreeBSD: head/sys/dev/ena/ena.c 325593 2017-11-09 13:38:17Z mw $
31  */
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bus.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/rman.h>
43 #include <sys/socket.h>
44 #include <sys/sockio.h>
45 #include <sys/sysctl.h>
46 #include <sys/taskqueue.h>
47 #include <sys/time.h>
48 #include <sys/eventhandler.h>
49
50 #include <net/bpf.h>
51 #include <net/if.h>
52 #include <net/if_var.h>
53 #include <net/if_arp.h>
54 #include <net/if_dl.h>
55 #include <net/if_media.h>
56 #include <net/if_types.h>
57 #include <net/ifq_var.h>
58 #include <net/vlan/if_vlan_var.h>
59
60 #include <netinet/in_systm.h>
61 #include <netinet/in.h>
62 #include <netinet/if_ether.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip6.h>
65 #include <netinet/tcp.h>
66 #include <netinet/udp.h>
67
68 #include <bus/pci/pcivar.h>
69 #include <bus/pci/pcireg.h>
70
71 #include "ena.h"
72 #include "ena_sysctl.h"
73
74 /*********************************************************
75  *  Function prototypes
76  *********************************************************/
77 static int      ena_probe(device_t);
78 static void     ena_intr_msix_mgmnt(void *);
79 static int      ena_allocate_pci_resources(struct ena_adapter*);
80 static void     ena_free_pci_resources(struct ena_adapter *);
81 static int      ena_change_mtu(if_t, int);
82 #if 0 /* XXX swildner counters */
83 static inline void ena_alloc_counters(counter_u64_t *, int);
84 static inline void ena_free_counters(counter_u64_t *, int);
85 static inline void ena_reset_counters(counter_u64_t *, int);
86 #endif
87 static void     ena_init_io_rings_common(struct ena_adapter *,
88     struct ena_ring *, uint16_t);
89 static void     ena_init_io_rings(struct ena_adapter *);
90 static void     ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
91 static void     ena_free_all_io_rings_resources(struct ena_adapter *);
92 static int      ena_setup_tx_dma_tag(struct ena_adapter *);
93 static int      ena_free_tx_dma_tag(struct ena_adapter *);
94 static int      ena_setup_rx_dma_tag(struct ena_adapter *);
95 static int      ena_free_rx_dma_tag(struct ena_adapter *);
96 static int      ena_setup_tx_resources(struct ena_adapter *, int);
97 static void     ena_free_tx_resources(struct ena_adapter *, int);
98 static int      ena_setup_all_tx_resources(struct ena_adapter *);
99 static void     ena_free_all_tx_resources(struct ena_adapter *);
100 static inline int validate_rx_req_id(struct ena_ring *, uint16_t);
101 static int      ena_setup_rx_resources(struct ena_adapter *, unsigned int);
102 static void     ena_free_rx_resources(struct ena_adapter *, unsigned int);
103 static int      ena_setup_all_rx_resources(struct ena_adapter *);
104 static void     ena_free_all_rx_resources(struct ena_adapter *);
105 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
106     struct ena_rx_buffer *);
107 static void     ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
108     struct ena_rx_buffer *);
109 static int      ena_refill_rx_bufs(struct ena_ring *, uint32_t);
110 static void     ena_free_rx_bufs(struct ena_adapter *, unsigned int);
111 static void     ena_refill_all_rx_bufs(struct ena_adapter *);
112 static void     ena_free_all_rx_bufs(struct ena_adapter *);
113 static void     ena_free_tx_bufs(struct ena_adapter *, unsigned int);
114 static void     ena_free_all_tx_bufs(struct ena_adapter *);
115 static void     ena_destroy_all_tx_queues(struct ena_adapter *);
116 static void     ena_destroy_all_rx_queues(struct ena_adapter *);
117 static void     ena_destroy_all_io_queues(struct ena_adapter *);
118 static int      ena_create_io_queues(struct ena_adapter *);
119 static int      ena_tx_cleanup(struct ena_ring *);
120 static void     ena_deferred_rx_cleanup(void *, int);
121 static int      ena_rx_cleanup(struct ena_ring *);
122 static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
123 static void     ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
124     struct mbuf *);
125 static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
126     struct ena_com_rx_ctx *, uint16_t *);
127 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
128     struct mbuf *);
129 static void     ena_handle_msix(void *);
130 static int      ena_enable_msix(struct ena_adapter *);
131 static void     ena_setup_mgmnt_intr(struct ena_adapter *);
132 static void     ena_setup_io_intr(struct ena_adapter *);
133 static int      ena_request_mgmnt_irq(struct ena_adapter *);
134 static int      ena_request_io_irq(struct ena_adapter *);
135 static void     ena_free_mgmnt_irq(struct ena_adapter *);
136 static void     ena_free_io_irq(struct ena_adapter *);
137 static void     ena_free_irqs(struct ena_adapter*);
138 static void     ena_disable_msix(struct ena_adapter *);
139 static void     ena_unmask_all_io_irqs(struct ena_adapter *);
140 static int      ena_rss_configure(struct ena_adapter *);
141 static int      ena_up_complete(struct ena_adapter *);
142 static int      ena_up(struct ena_adapter *);
143 static void     ena_down(struct ena_adapter *);
144 #if 0 /* XXX swildner counters */
145 static uint64_t ena_get_counter(if_t, ift_counter);
146 #endif
147 static int      ena_media_change(if_t);
148 static void     ena_media_status(if_t, struct ifmediareq *);
149 static void     ena_init(void *);
150 static int      ena_ioctl(if_t, u_long, caddr_t, struct ucred *);
151 static int      ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
152 static void     ena_update_host_info(struct ena_admin_host_info *, if_t);
153 static void     ena_update_hwassist(struct ena_adapter *);
154 static int      ena_setup_ifnet(device_t, struct ena_adapter *,
155     struct ena_com_dev_get_features_ctx *);
156 static void     ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *);
157 static int      ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
158     struct mbuf **mbuf);
159 static int      ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
160 static void     ena_start_xmit(struct ifnet *, struct ifaltq_subque *);
161 static int      ena_calc_io_queue_num(struct ena_adapter *,
162     struct ena_com_dev_get_features_ctx *);
163 static int      ena_calc_queue_size(struct ena_adapter *, uint16_t *,
164     uint16_t *, struct ena_com_dev_get_features_ctx *);
165 static int      ena_rss_init_default(struct ena_adapter *);
166 static void     ena_rss_init_default_deferred(void *);
167 static void     ena_config_host_info(struct ena_com_dev *);
168 static int      ena_attach(device_t);
169 static int      ena_detach(device_t);
170 static int      ena_device_init(struct ena_adapter *, device_t,
171     struct ena_com_dev_get_features_ctx *, int *);
172 static int      ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *,
173     int);
174 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
175 static void     unimplemented_aenq_handler(void *,
176     struct ena_admin_aenq_entry *);
177 static void     ena_timer_service(void *);
178
179 static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
180
181 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD, 0, "ENA driver parameters");
182
183 /*
184  * Logging level for changing verbosity of the output
185  */
186 int ena_log_level = ENA_ALERT | ENA_WARNING;
187 TUNABLE_INT("hw.ena.ena_log_level", &ena_log_level);
188 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RW,
189     &ena_log_level, 0, "Logging level indicating verbosity of the logs");
190
191 static ena_vendor_info_t ena_vendor_info_array[] = {
192     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
193     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_PF, 0},
194     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0},
195     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_VF, 0},
196     /* Last entry */
197     { 0, 0, 0 }
198 };
199
200 /*
201  * Contains pointers to event handlers, e.g. link state chage.
202  */
203 static struct ena_aenq_handlers aenq_handlers;
204
205 void
206 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
207 {
208         if (error != 0)
209                 return;
210         *(bus_addr_t *) arg = segs[0].ds_addr;
211 }
212
213 int
214 ena_dma_alloc(device_t dmadev, bus_size_t size,
215     ena_mem_handle_t *dma , int mapflags)
216 {
217         struct ena_adapter* adapter = device_get_softc(dmadev);
218         uint32_t maxsize;
219         uint64_t dma_space_addr;
220         int error;
221
222         maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
223
224         dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
225         if (unlikely(dma_space_addr == 0))
226                 dma_space_addr = BUS_SPACE_MAXADDR;
227
228         error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
229             8, 0,             /* alignment, bounds              */
230             dma_space_addr,   /* lowaddr of exclusion window    */
231             BUS_SPACE_MAXADDR,/* highaddr of exclusion window   */
232             maxsize,          /* maxsize                        */
233             1,                /* nsegments                      */
234             maxsize,          /* maxsegsize                     */
235             BUS_DMA_ALLOCNOW, /* flags                          */
236             &dma->tag);
237         if (unlikely(error != 0)) {
238                 ena_trace(ENA_ALERT, "bus_dma_tag_create failed: %d\n", error);
239                 goto fail_tag;
240         }
241
242         error = bus_dmamem_alloc(dma->tag, (void**) &dma->vaddr,
243             BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
244         if (unlikely(error != 0)) {
245                 ena_trace(ENA_ALERT, "bus_dmamem_alloc(%ju) failed: %d\n",
246                     (uintmax_t)size, error);
247                 goto fail_map_create;
248         }
249
250         dma->paddr = 0;
251         error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr,
252             size, ena_dmamap_callback, &dma->paddr, mapflags);
253         if (unlikely((error != 0) || (dma->paddr == 0))) {
254                 ena_trace(ENA_ALERT, ": bus_dmamap_load failed: %d\n", error);
255                 goto fail_map_load;
256         }
257
258         return (0);
259
260 fail_map_load:
261         bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
262 fail_map_create:
263         bus_dma_tag_destroy(dma->tag);
264 fail_tag:
265         dma->tag = NULL;
266
267         return (error);
268 }
269
270 static int
271 ena_allocate_pci_resources(struct ena_adapter* adapter)
272 {
273         device_t pdev = adapter->pdev;
274         int rid;
275
276         rid = PCIR_BAR(ENA_REG_BAR);
277         adapter->memory = NULL;
278         adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
279             &rid, RF_ACTIVE);
280         if (unlikely(adapter->registers == NULL)) {
281                 device_printf(pdev, "Unable to allocate bus resource: "
282                     "registers\n");
283                 return (ENXIO);
284         }
285
286         return (0);
287 }
288
289 static void
290 ena_free_pci_resources(struct ena_adapter *adapter)
291 {
292         device_t pdev = adapter->pdev;
293
294         if (adapter->memory != NULL) {
295                 bus_release_resource(pdev, SYS_RES_MEMORY,
296                     PCIR_BAR(ENA_MEM_BAR), adapter->memory);
297         }
298
299         if (adapter->registers != NULL) {
300                 bus_release_resource(pdev, SYS_RES_MEMORY,
301                     PCIR_BAR(ENA_REG_BAR), adapter->registers);
302         }
303 }
304
305 static int
306 ena_probe(device_t dev)
307 {
308         ena_vendor_info_t *ent;
309         char            adapter_name[60];
310         uint16_t        pci_vendor_id = 0;
311         uint16_t        pci_device_id = 0;
312
313         pci_vendor_id = pci_get_vendor(dev);
314         pci_device_id = pci_get_device(dev);
315
316         ent = ena_vendor_info_array;
317         while (ent->vendor_id != 0) {
318                 if ((pci_vendor_id == ent->vendor_id) &&
319                     (pci_device_id == ent->device_id)) {
320                         ena_trace(ENA_DBG, "vendor=%x device=%x ",
321                             pci_vendor_id, pci_device_id);
322
323                         ksprintf(adapter_name, DEVICE_DESC);
324                         device_set_desc_copy(dev, adapter_name);
325                         return (BUS_PROBE_DEFAULT);
326                 }
327
328                 ent++;
329
330         }
331
332         return (ENXIO);
333 }
334
335 static int
336 ena_change_mtu(if_t ifp, int new_mtu)
337 {
338         struct ena_adapter *adapter = ifp->if_softc;
339         int rc;
340
341         if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
342                 device_printf(adapter->pdev, "Invalid MTU setting. "
343                     "new_mtu: %d max mtu: %d min mtu: %d\n",
344                     new_mtu, adapter->max_mtu, ENA_MIN_MTU);
345                 return (EINVAL);
346         }
347
348         rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
349         if (likely(rc == 0)) {
350                 ena_trace(ENA_DBG, "set MTU to %d\n", new_mtu);
351                 ifp->if_mtu = new_mtu;
352         } else {
353                 device_printf(adapter->pdev, "Failed to set MTU to %d\n",
354                     new_mtu);
355         }
356
357         return (rc);
358 }
359
360 #if 0 /* XXX swildner counters */
361 static inline void
362 ena_alloc_counters(counter_u64_t *begin, int size)
363 {
364         counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
365
366         for (; begin < end; ++begin)
367                 *begin = counter_u64_alloc(M_WAITOK);
368 }
369
370 static inline void
371 ena_free_counters(counter_u64_t *begin, int size)
372 {
373         counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
374
375         for (; begin < end; ++begin)
376                 counter_u64_free(*begin);
377 }
378
379 static inline void
380 ena_reset_counters(counter_u64_t *begin, int size)
381 {
382         counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
383
384         for (; begin < end; ++begin)
385                 counter_u64_zero(*begin);
386 }
387 #endif
388
389 static void
390 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
391     uint16_t qid)
392 {
393
394         ring->qid = qid;
395         ring->adapter = adapter;
396         ring->ena_dev = adapter->ena_dev;
397 }
398
399 static void
400 ena_init_io_rings(struct ena_adapter *adapter)
401 {
402         struct ena_com_dev *ena_dev;
403         struct ena_ring *txr, *rxr;
404         struct ena_que *que;
405         int i;
406
407         ena_dev = adapter->ena_dev;
408
409         for (i = 0; i < adapter->num_queues; i++) {
410                 txr = &adapter->tx_ring[i];
411                 rxr = &adapter->rx_ring[i];
412
413                 /* TX/RX common ring state */
414                 ena_init_io_rings_common(adapter, txr, i);
415                 ena_init_io_rings_common(adapter, rxr, i);
416
417                 /* TX specific ring state */
418                 txr->ring_size = adapter->tx_ring_size;
419                 txr->tx_max_header_size = ena_dev->tx_max_header_size;
420                 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
421                 txr->smoothed_interval =
422                     ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
423
424 #if 0 /* XXX swildner counters */
425                 /* Alloc TX statistics. */
426                 ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
427                     sizeof(txr->tx_stats));
428 #endif
429
430                 /* RX specific ring state */
431                 rxr->ring_size = adapter->rx_ring_size;
432                 rxr->smoothed_interval =
433                     ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
434
435 #if 0 /* XXX swildner counters */
436                 /* Alloc RX statistics. */
437                 ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
438                     sizeof(rxr->rx_stats));
439 #endif
440
441                 /* Initialize locks */
442                 ksnprintf(txr->lock_name, nitems(txr->lock_name), "%s:tx(%d)",
443                     device_get_nameunit(adapter->pdev), i);
444                 ksnprintf(rxr->lock_name, nitems(rxr->lock_name), "%s:rx(%d)",
445                     device_get_nameunit(adapter->pdev), i);
446
447                 lockinit(&txr->ring_lock, txr->lock_name, 0, LK_CANRECURSE);
448                 lockinit(&rxr->ring_lock, rxr->lock_name, 0, LK_CANRECURSE);
449
450                 que = &adapter->que[i];
451                 que->adapter = adapter;
452                 que->id = i;
453                 que->tx_ring = txr;
454                 que->rx_ring = rxr;
455
456                 txr->que = que;
457                 rxr->que = que;
458
459                 rxr->empty_rx_queue = 0;
460         }
461 }
462
463 static void
464 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
465 {
466         struct ena_ring *txr = &adapter->tx_ring[qid];
467         struct ena_ring *rxr = &adapter->rx_ring[qid];
468
469 #if 0 /* XXX swildner counters */
470         ena_free_counters((counter_u64_t *)&txr->tx_stats,
471             sizeof(txr->tx_stats));
472         ena_free_counters((counter_u64_t *)&rxr->rx_stats,
473             sizeof(rxr->rx_stats));
474 #endif
475
476         lockuninit(&txr->ring_lock);
477         lockuninit(&rxr->ring_lock);
478 }
479
480 static void
481 ena_free_all_io_rings_resources(struct ena_adapter *adapter)
482 {
483         int i;
484
485         for (i = 0; i < adapter->num_queues; i++)
486                 ena_free_io_ring_resources(adapter, i);
487
488 }
489
490 static int
491 ena_setup_tx_dma_tag(struct ena_adapter *adapter)
492 {
493         int ret;
494
495         /* Create DMA tag for Tx buffers */
496         ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
497             1, 0,                                 /* alignment, bounds       */
498             ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
499             BUS_SPACE_MAXADDR,                    /* highaddr of excl window */
500             ENA_TSO_MAXSIZE,                      /* maxsize                 */
501             ENA_BUS_DMA_SEGS,                     /* nsegments               */
502             ENA_TSO_MAXSIZE,                      /* maxsegsize              */
503             0,                                    /* flags                   */
504             &adapter->tx_buf_tag);
505
506         return (ret);
507 }
508
509 static int
510 ena_free_tx_dma_tag(struct ena_adapter *adapter)
511 {
512         int ret;
513
514         ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
515
516         if (likely(ret == 0))
517                 adapter->tx_buf_tag = NULL;
518
519         return (ret);
520 }
521
522 static int
523 ena_setup_rx_dma_tag(struct ena_adapter *adapter)
524 {
525         int ret;
526
527         /* Create DMA tag for Rx buffers*/
528         ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent   */
529             1, 0,                                 /* alignment, bounds       */
530             ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
531             BUS_SPACE_MAXADDR,                    /* highaddr of excl window */
532             MJUM16BYTES,                          /* maxsize                 */
533             adapter->max_rx_sgl_size,             /* nsegments               */
534             MJUM16BYTES,                          /* maxsegsize              */
535             0,                                    /* flags                   */
536             &adapter->rx_buf_tag);
537
538         return (ret);
539 }
540
541 static int
542 ena_free_rx_dma_tag(struct ena_adapter *adapter)
543 {
544         int ret;
545
546         ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
547
548         if (likely(ret == 0))
549                 adapter->rx_buf_tag = NULL;
550
551         return (ret);
552 }
553
554 /**
555  * ena_setup_tx_resources - allocate Tx resources (Descriptors)
556  * @adapter: network interface device structure
557  * @qid: queue index
558  *
559  * Returns 0 on success, otherwise on failure.
560  **/
561 static int
562 ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
563 {
564         struct ena_que *que = &adapter->que[qid];
565         struct ena_ring *tx_ring = que->tx_ring;
566         int size, i, err;
567 #ifdef  RSS
568         cpuset_t cpu_mask;
569 #endif
570
571         size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
572
573         tx_ring->tx_buffer_info = kmalloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
574         if (unlikely(tx_ring->tx_buffer_info == NULL))
575                 return (ENOMEM);
576
577         size = sizeof(uint16_t) * tx_ring->ring_size;
578         tx_ring->free_tx_ids = kmalloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
579         if (unlikely(tx_ring->free_tx_ids == NULL))
580                 goto err_buf_info_free;
581
582         /* Req id stack for TX OOO completions */
583         for (i = 0; i < tx_ring->ring_size; i++)
584                 tx_ring->free_tx_ids[i] = i;
585
586 #if 0 /* XXX swildner counters */
587         /* Reset TX statistics. */
588         ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
589             sizeof(tx_ring->tx_stats));
590 #endif
591
592         tx_ring->next_to_use = 0;
593         tx_ring->next_to_clean = 0;
594
595         /* ... and create the buffer DMA maps */
596         for (i = 0; i < tx_ring->ring_size; i++) {
597                 err = bus_dmamap_create(adapter->tx_buf_tag, 0,
598                     &tx_ring->tx_buffer_info[i].map);
599                 if (unlikely(err != 0)) {
600                         ena_trace(ENA_ALERT,
601                              "Unable to create Tx DMA map for buffer %d\n", i);
602                         goto err_buf_info_unmap;
603                 }
604         }
605
606         return (0);
607
608 err_buf_info_unmap:
609         while (i--) {
610                 bus_dmamap_destroy(adapter->tx_buf_tag,
611                     tx_ring->tx_buffer_info[i].map);
612         }
613         kfree(tx_ring->free_tx_ids, M_DEVBUF);
614         tx_ring->free_tx_ids = NULL;
615 err_buf_info_free:
616         kfree(tx_ring->tx_buffer_info, M_DEVBUF);
617         tx_ring->tx_buffer_info = NULL;
618
619         return (ENOMEM);
620 }
621
622 /**
623  * ena_free_tx_resources - Free Tx Resources per Queue
624  * @adapter: network interface device structure
625  * @qid: queue index
626  *
627  * Free all transmit software resources
628  **/
629 static void
630 ena_free_tx_resources(struct ena_adapter *adapter, int qid)
631 {
632         struct ena_ring *tx_ring = &adapter->tx_ring[qid];
633
634         ENA_RING_MTX_LOCK(tx_ring);
635
636         /* Free buffer DMA maps, */
637         for (int i = 0; i < tx_ring->ring_size; i++) {
638                 m_freem(tx_ring->tx_buffer_info[i].mbuf);
639                 tx_ring->tx_buffer_info[i].mbuf = NULL;
640                 bus_dmamap_unload(adapter->tx_buf_tag,
641                     tx_ring->tx_buffer_info[i].map);
642                 bus_dmamap_destroy(adapter->tx_buf_tag,
643                     tx_ring->tx_buffer_info[i].map);
644         }
645         ENA_RING_MTX_UNLOCK(tx_ring);
646
647         /* And free allocated memory. */
648         kfree(tx_ring->tx_buffer_info, M_DEVBUF);
649         tx_ring->tx_buffer_info = NULL;
650
651         kfree(tx_ring->free_tx_ids, M_DEVBUF);
652         tx_ring->free_tx_ids = NULL;
653 }
654
655 /**
656  * ena_setup_all_tx_resources - allocate all queues Tx resources
657  * @adapter: network interface device structure
658  *
659  * Returns 0 on success, otherwise on failure.
660  **/
661 static int
662 ena_setup_all_tx_resources(struct ena_adapter *adapter)
663 {
664         int i, rc;
665
666         for (i = 0; i < adapter->num_queues; i++) {
667                 rc = ena_setup_tx_resources(adapter, i);
668                 if (rc != 0) {
669                         device_printf(adapter->pdev,
670                             "Allocation for Tx Queue %u failed\n", i);
671                         goto err_setup_tx;
672                 }
673         }
674
675         return (0);
676
677 err_setup_tx:
678         /* Rewind the index freeing the rings as we go */
679         while (i--)
680                 ena_free_tx_resources(adapter, i);
681         return (rc);
682 }
683
684 /**
685  * ena_free_all_tx_resources - Free Tx Resources for All Queues
686  * @adapter: network interface device structure
687  *
688  * Free all transmit software resources
689  **/
690 static void
691 ena_free_all_tx_resources(struct ena_adapter *adapter)
692 {
693         int i;
694
695         for (i = 0; i < adapter->num_queues; i++)
696                 ena_free_tx_resources(adapter, i);
697 }
698
699 static inline int
700 validate_rx_req_id(struct ena_ring *rx_ring, uint16_t req_id)
701 {
702         if (likely(req_id < rx_ring->ring_size))
703                 return (0);
704
705         device_printf(rx_ring->adapter->pdev, "Invalid rx req_id: %hu\n",
706             req_id);
707         IFNET_STAT_INC(rx_ring->adapter->ifp, ierrors, 1);
708 #if 0 /* XXX swildner counters */
709         counter_u64_add(rx_ring->rx_stats.bad_req_id, 1);
710 #endif
711
712         /* Trigger device reset */
713         rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
714         rx_ring->adapter->trigger_reset = true;
715
716         return (EFAULT);
717 }
718
719 /**
720  * ena_setup_rx_resources - allocate Rx resources (Descriptors)
721  * @adapter: network interface device structure
722  * @qid: queue index
723  *
724  * Returns 0 on success, otherwise on failure.
725  **/
726 static int
727 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
728 {
729         struct ena_que *que = &adapter->que[qid];
730         struct ena_ring *rx_ring = que->rx_ring;
731         int size, err, i;
732 #ifdef  RSS
733         cpuset_t cpu_mask;
734 #endif
735
736         size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
737
738         /*
739          * Alloc extra element so in rx path
740          * we can always prefetch rx_info + 1
741          */
742         size += sizeof(struct ena_rx_buffer);
743
744         rx_ring->rx_buffer_info = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
745
746         size = sizeof(uint16_t) * rx_ring->ring_size;
747         rx_ring->free_rx_ids = kmalloc(size, M_DEVBUF, M_WAITOK);
748
749         for (i = 0; i < rx_ring->ring_size; i++)
750                 rx_ring->free_rx_ids[i] = i;
751
752 #if 0 /* XXX swildner counters */
753         /* Reset RX statistics. */
754         ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
755             sizeof(rx_ring->rx_stats));
756 #endif
757
758         rx_ring->next_to_clean = 0;
759         rx_ring->next_to_use = 0;
760
761         /* ... and create the buffer DMA maps */
762         for (i = 0; i < rx_ring->ring_size; i++) {
763                 err = bus_dmamap_create(adapter->rx_buf_tag, 0,
764                     &(rx_ring->rx_buffer_info[i].map));
765                 if (err != 0) {
766                         ena_trace(ENA_ALERT,
767                             "Unable to create Rx DMA map for buffer %d\n", i);
768                         goto err_buf_info_unmap;
769                 }
770         }
771
772 #if 0 /* XXX LRO */
773         /* Create LRO for the ring */
774         if ((adapter->ifp->if_capenable & IFCAP_LRO) != 0) {
775                 int err = tcp_lro_init(&rx_ring->lro);
776                 if (err != 0) {
777                         device_printf(adapter->pdev,
778                             "LRO[%d] Initialization failed!\n", qid);
779                 } else {
780                         ena_trace(ENA_INFO,
781                             "RX Soft LRO[%d] Initialized\n", qid);
782                         rx_ring->lro.ifp = adapter->ifp;
783                 }
784         }
785 #endif
786
787         /* Allocate taskqueues */
788         TASK_INIT(&rx_ring->cmpl_task, 0, ena_deferred_rx_cleanup, rx_ring);
789         rx_ring->cmpl_tq = taskqueue_create("ena RX completion", M_WAITOK,
790             taskqueue_thread_enqueue, &rx_ring->cmpl_tq);
791
792         /* RSS set cpu for thread */
793 #ifdef RSS
794         CPU_SETOF(que->cpu, &cpu_mask);
795         taskqueue_start_threads_cpuset(&rx_ring->cmpl_tq, 1, PI_NET, &cpu_mask,
796             "%s rx_ring cmpl (bucket %d)",
797             device_get_nameunit(adapter->pdev), que->cpu);
798 #else
799         taskqueue_start_threads(&rx_ring->cmpl_tq, 1, TDPRI_KERN_DAEMON, -1,
800             "%s rx_ring cmpl", device_get_nameunit(adapter->pdev));
801 #endif
802
803         return (0);
804
805 err_buf_info_unmap:
806         while (i--) {
807                 bus_dmamap_destroy(adapter->rx_buf_tag,
808                     rx_ring->rx_buffer_info[i].map);
809         }
810
811         kfree(rx_ring->free_rx_ids, M_DEVBUF);
812         rx_ring->free_rx_ids = NULL;
813         kfree(rx_ring->rx_buffer_info, M_DEVBUF);
814         rx_ring->rx_buffer_info = NULL;
815         return (ENOMEM);
816 }
817
818 /**
819  * ena_free_rx_resources - Free Rx Resources
820  * @adapter: network interface device structure
821  * @qid: queue index
822  *
823  * Free all receive software resources
824  **/
825 static void
826 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
827 {
828         struct ena_ring *rx_ring = &adapter->rx_ring[qid];
829
830         while (taskqueue_cancel(rx_ring->cmpl_tq, &rx_ring->cmpl_task, NULL) != 0)
831                 taskqueue_drain(rx_ring->cmpl_tq, &rx_ring->cmpl_task);
832
833         taskqueue_free(rx_ring->cmpl_tq);
834
835         /* Free buffer DMA maps, */
836         for (int i = 0; i < rx_ring->ring_size; i++) {
837                 m_freem(rx_ring->rx_buffer_info[i].mbuf);
838                 rx_ring->rx_buffer_info[i].mbuf = NULL;
839                 bus_dmamap_unload(adapter->rx_buf_tag,
840                     rx_ring->rx_buffer_info[i].map);
841                 bus_dmamap_destroy(adapter->rx_buf_tag,
842                     rx_ring->rx_buffer_info[i].map);
843         }
844
845 #if 0 /* XXX LRO */
846         /* free LRO resources, */
847         tcp_lro_free(&rx_ring->lro);
848 #endif
849
850         /* free allocated memory */
851         kfree(rx_ring->rx_buffer_info, M_DEVBUF);
852         rx_ring->rx_buffer_info = NULL;
853
854         kfree(rx_ring->free_rx_ids, M_DEVBUF);
855         rx_ring->free_rx_ids = NULL;
856 }
857
858 /**
859  * ena_setup_all_rx_resources - allocate all queues Rx resources
860  * @adapter: network interface device structure
861  *
862  * Returns 0 on success, otherwise on failure.
863  **/
864 static int
865 ena_setup_all_rx_resources(struct ena_adapter *adapter)
866 {
867         int i, rc = 0;
868
869         for (i = 0; i < adapter->num_queues; i++) {
870                 rc = ena_setup_rx_resources(adapter, i);
871                 if (rc != 0) {
872                         device_printf(adapter->pdev,
873                             "Allocation for Rx Queue %u failed\n", i);
874                         goto err_setup_rx;
875                 }
876         }
877         return (0);
878
879 err_setup_rx:
880         /* rewind the index freeing the rings as we go */
881         while (i--)
882                 ena_free_rx_resources(adapter, i);
883         return (rc);
884 }
885
886 /**
887  * ena_free_all_rx_resources - Free Rx resources for all queues
888  * @adapter: network interface device structure
889  *
890  * Free all receive software resources
891  **/
892 static void
893 ena_free_all_rx_resources(struct ena_adapter *adapter)
894 {
895         int i;
896
897         for (i = 0; i < adapter->num_queues; i++)
898                 ena_free_rx_resources(adapter, i);
899 }
900
901 static inline int
902 ena_alloc_rx_mbuf(struct ena_adapter *adapter,
903     struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
904 {
905         struct ena_com_buf *ena_buf;
906         bus_dma_segment_t segs[1];
907         int nsegs, error;
908         int mlen;
909
910         /* if previous allocated frag is not used */
911         if (unlikely(rx_info->mbuf != NULL))
912                 return (0);
913
914         /* Get mbuf using UMA allocator */
915         rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
916
917         if (unlikely(rx_info->mbuf == NULL)) {
918 #if 0 /* XXX swildner counters */
919                 counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
920 #endif
921                 rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
922                 if (unlikely(rx_info->mbuf == NULL)) {
923                         IFNET_STAT_INC(rx_ring->adapter->ifp, ierrors, 1);
924 #if 0 /* XXX swildner counters */
925                         counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
926 #endif
927                         return (ENOMEM);
928                 }
929                 mlen = MCLBYTES;
930         } else {
931                 mlen = MJUMPAGESIZE;
932         }
933         /* Set mbuf length*/
934         rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
935
936         /* Map packets for DMA */
937         ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
938             "Using tag %p for buffers' DMA mapping, mbuf %p len: %d",
939             adapter->rx_buf_tag,rx_info->mbuf, rx_info->mbuf->m_len);
940         error = bus_dmamap_load_mbuf_segment(adapter->rx_buf_tag, rx_info->map,
941             rx_info->mbuf, segs, 1, &nsegs, BUS_DMA_NOWAIT);
942         if (unlikely((error != 0) || (nsegs != 1))) {
943                 ena_trace(ENA_WARNING, "failed to map mbuf, error: %d, "
944                     "nsegs: %d\n", error, nsegs);
945                 IFNET_STAT_INC(rx_ring->adapter->ifp, ierrors, 1);
946 #if 0 /* XXX swildner counters */
947                 counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
948 #endif
949                 goto exit;
950
951         }
952
953         bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
954
955         ena_buf = &rx_info->ena_buf;
956         ena_buf->paddr = segs[0].ds_addr;
957         ena_buf->len = mlen;
958
959         ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
960             "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
961             rx_info->mbuf, rx_info,ena_buf->len, (uintmax_t)ena_buf->paddr);
962
963         return (0);
964
965 exit:
966         m_freem(rx_info->mbuf);
967         rx_info->mbuf = NULL;
968         return (EFAULT);
969 }
970
971 static void
972 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
973     struct ena_rx_buffer *rx_info)
974 {
975
976         if (rx_info->mbuf == NULL) {
977                 ena_trace(ENA_WARNING, "Trying to free unallocated buffer\n");
978                 return;
979         }
980
981         bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
982         m_freem(rx_info->mbuf);
983         rx_info->mbuf = NULL;
984 }
985
986 /**
987  * ena_refill_rx_bufs - Refills ring with descriptors
988  * @rx_ring: the ring which we want to feed with free descriptors
989  * @num: number of descriptors to refill
990  * Refills the ring with newly allocated DMA-mapped mbufs for receiving
991  **/
992 static int
993 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
994 {
995         struct ena_adapter *adapter = rx_ring->adapter;
996         uint16_t next_to_use, req_id;
997         uint32_t i;
998         int rc;
999
1000         ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d",
1001             rx_ring->qid);
1002
1003         next_to_use = rx_ring->next_to_use;
1004
1005         for (i = 0; i < num; i++) {
1006                 struct ena_rx_buffer *rx_info;
1007
1008                 ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC,
1009                     "RX buffer - next to use: %d", next_to_use);
1010
1011                 req_id = rx_ring->free_rx_ids[next_to_use];
1012                 rc = validate_rx_req_id(rx_ring, req_id);
1013                 if (unlikely(rc != 0))
1014                         break;
1015
1016                 rx_info = &rx_ring->rx_buffer_info[req_id];
1017
1018                 rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1019                 if (unlikely(rc != 0)) {
1020                         ena_trace(ENA_WARNING,
1021                             "failed to alloc buffer for rx queue %d\n",
1022                             rx_ring->qid);
1023                         break;
1024                 }
1025                 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1026                     &rx_info->ena_buf, req_id);
1027                 if (unlikely(rc != 0)) {
1028                         ena_trace(ENA_WARNING,
1029                             "failed to add buffer for rx queue %d\n",
1030                             rx_ring->qid);
1031                         break;
1032                 }
1033                 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1034                     rx_ring->ring_size);
1035         }
1036
1037         if (unlikely(i < num)) {
1038                 IFNET_STAT_INC(rx_ring->adapter->ifp, ierrors, 1);
1039 #if 0 /* XXX swildner counters */
1040                 counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1041 #endif
1042                 ena_trace(ENA_WARNING,
1043                      "refilled rx qid %d with only %d mbufs (from %d)\n",
1044                      rx_ring->qid, i, num);
1045         }
1046
1047         if (likely(i != 0)) {
1048                 wmb();
1049                 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1050         }
1051         rx_ring->next_to_use = next_to_use;
1052         return (i);
1053 }
1054
1055 static void
1056 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1057 {
1058         struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1059         unsigned int i;
1060
1061         for (i = 0; i < rx_ring->ring_size; i++) {
1062                 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1063
1064                 if (rx_info->mbuf != NULL)
1065                         ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1066         }
1067 }
1068
1069 /**
1070  * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1071  * @adapter: network interface device structure
1072  *
1073  */
1074 static void
1075 ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1076 {
1077         struct ena_ring *rx_ring;
1078         int i, rc, bufs_num;
1079
1080         for (i = 0; i < adapter->num_queues; i++) {
1081                 rx_ring = &adapter->rx_ring[i];
1082                 bufs_num = rx_ring->ring_size - 1;
1083                 rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1084
1085                 if (unlikely(rc != bufs_num))
1086                         ena_trace(ENA_WARNING, "refilling Queue %d failed. "
1087                             "Allocated %d buffers from: %d\n", i, rc, bufs_num);
1088         }
1089 }
1090
1091 static void
1092 ena_free_all_rx_bufs(struct ena_adapter *adapter)
1093 {
1094         int i;
1095
1096         for (i = 0; i < adapter->num_queues; i++)
1097                 ena_free_rx_bufs(adapter, i);
1098 }
1099
1100 /**
1101  * ena_free_tx_bufs - Free Tx Buffers per Queue
1102  * @adapter: network interface device structure
1103  * @qid: queue index
1104  **/
1105 static void
1106 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1107 {
1108         bool print_once = true;
1109         struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1110
1111         ENA_RING_MTX_LOCK(tx_ring);
1112         for (int i = 0; i < tx_ring->ring_size; i++) {
1113                 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1114
1115                 if (tx_info->mbuf == NULL)
1116                         continue;
1117
1118                 if (print_once) {
1119                         device_printf(adapter->pdev,
1120                             "free uncompleted tx mbuf qid %d idx 0x%x",
1121                             qid, i);
1122                         print_once = false;
1123                 } else {
1124                         ena_trace(ENA_DBG,
1125                             "free uncompleted tx mbuf qid %d idx 0x%x",
1126                              qid, i);
1127                 }
1128
1129                 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map);
1130                 m_free(tx_info->mbuf);
1131                 tx_info->mbuf = NULL;
1132         }
1133         ENA_RING_MTX_UNLOCK(tx_ring);
1134 }
1135
1136 static void
1137 ena_free_all_tx_bufs(struct ena_adapter *adapter)
1138 {
1139
1140         for (int i = 0; i < adapter->num_queues; i++)
1141                 ena_free_tx_bufs(adapter, i);
1142 }
1143
1144 static void
1145 ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1146 {
1147         uint16_t ena_qid;
1148         int i;
1149
1150         for (i = 0; i < adapter->num_queues; i++) {
1151                 ena_qid = ENA_IO_TXQ_IDX(i);
1152                 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1153         }
1154 }
1155
1156 static void
1157 ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1158 {
1159         uint16_t ena_qid;
1160         int i;
1161
1162         for (i = 0; i < adapter->num_queues; i++) {
1163                 ena_qid = ENA_IO_RXQ_IDX(i);
1164                 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1165         }
1166 }
1167
1168 static void
1169 ena_destroy_all_io_queues(struct ena_adapter *adapter)
1170 {
1171         ena_destroy_all_tx_queues(adapter);
1172         ena_destroy_all_rx_queues(adapter);
1173 }
1174
1175 static inline int
1176 validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
1177 {
1178         struct ena_adapter *adapter = tx_ring->adapter;
1179         struct ena_tx_buffer *tx_info = NULL;
1180
1181         if (likely(req_id < tx_ring->ring_size)) {
1182                 tx_info = &tx_ring->tx_buffer_info[req_id];
1183                 if (tx_info->mbuf != NULL)
1184                         return (0);
1185         }
1186
1187         if (tx_info->mbuf == NULL)
1188                 device_printf(adapter->pdev,
1189                     "tx_info doesn't have valid mbuf\n");
1190         else
1191                 device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id);
1192
1193         IFNET_STAT_INC(tx_ring->adapter->ifp, oerrors, 1);
1194 #if 0 /* XXX swildner counters */
1195         counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
1196 #endif
1197
1198         return (EFAULT);
1199 }
1200
1201 static int
1202 ena_create_io_queues(struct ena_adapter *adapter)
1203 {
1204         struct ena_com_dev *ena_dev = adapter->ena_dev;
1205         struct ena_com_create_io_ctx ctx;
1206         struct ena_ring *ring;
1207         uint16_t ena_qid;
1208         uint32_t msix_vector;
1209         int rc, i;
1210
1211         /* Create TX queues */
1212         for (i = 0; i < adapter->num_queues; i++) {
1213                 msix_vector = ENA_IO_IRQ_IDX(i);
1214                 ena_qid = ENA_IO_TXQ_IDX(i);
1215                 ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1216                 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1217                 ctx.queue_size = adapter->tx_ring_size;
1218                 ctx.msix_vector = msix_vector;
1219                 ctx.qid = ena_qid;
1220                 rc = ena_com_create_io_queue(ena_dev, &ctx);
1221                 if (rc != 0) {
1222                         device_printf(adapter->pdev,
1223                             "Failed to create io TX queue #%d rc: %d\n", i, rc);
1224                         goto err_tx;
1225                 }
1226                 ring = &adapter->tx_ring[i];
1227                 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1228                     &ring->ena_com_io_sq,
1229                     &ring->ena_com_io_cq);
1230                 if (rc != 0) {
1231                         device_printf(adapter->pdev,
1232                             "Failed to get TX queue handlers. TX queue num"
1233                             " %d rc: %d\n", i, rc);
1234                         ena_com_destroy_io_queue(ena_dev, ena_qid);
1235                         goto err_tx;
1236                 }
1237         }
1238
1239         /* Create RX queues */
1240         for (i = 0; i < adapter->num_queues; i++) {
1241                 msix_vector = ENA_IO_IRQ_IDX(i);
1242                 ena_qid = ENA_IO_RXQ_IDX(i);
1243                 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1244                 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1245                 ctx.queue_size = adapter->rx_ring_size;
1246                 ctx.msix_vector = msix_vector;
1247                 ctx.qid = ena_qid;
1248                 rc = ena_com_create_io_queue(ena_dev, &ctx);
1249                 if (unlikely(rc != 0)) {
1250                         device_printf(adapter->pdev,
1251                             "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1252                         goto err_rx;
1253                 }
1254
1255                 ring = &adapter->rx_ring[i];
1256                 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1257                     &ring->ena_com_io_sq,
1258                     &ring->ena_com_io_cq);
1259                 if (unlikely(rc != 0)) {
1260                         device_printf(adapter->pdev,
1261                             "Failed to get RX queue handlers. RX queue num"
1262                             " %d rc: %d\n", i, rc);
1263                         ena_com_destroy_io_queue(ena_dev, ena_qid);
1264                         goto err_rx;
1265                 }
1266         }
1267
1268         return (0);
1269
1270 err_rx:
1271         while (i--)
1272                 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1273         i = adapter->num_queues;
1274 err_tx:
1275         while (i--)
1276                 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1277
1278         return (ENXIO);
1279 }
1280
1281 /**
1282  * ena_tx_cleanup - clear sent packets and corresponding descriptors
1283  * @tx_ring: ring for which we want to clean packets
1284  *
1285  * Once packets are sent, we ask the device in a loop for no longer used
1286  * descriptors. We find the related mbuf chain in a map (index in an array)
1287  * and free it, then update ring state.
1288  * This is performed in "endless" loop, updating ring pointers every
1289  * TX_COMMIT. The first check of free descriptor is performed before the actual
1290  * loop, then repeated at the loop end.
1291  **/
1292 static int
1293 ena_tx_cleanup(struct ena_ring *tx_ring)
1294 {
1295         struct ena_adapter *adapter;
1296         struct ena_com_io_cq* io_cq;
1297         uint16_t next_to_clean;
1298         uint16_t req_id;
1299         uint16_t ena_qid;
1300         unsigned int total_done = 0;
1301         int rc;
1302         int commit = TX_COMMIT;
1303         int budget = TX_BUDGET;
1304         int work_done;
1305
1306         adapter = tx_ring->que->adapter;
1307         ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
1308         io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1309         next_to_clean = tx_ring->next_to_clean;
1310
1311         do {
1312                 struct ena_tx_buffer *tx_info;
1313                 struct mbuf *mbuf;
1314
1315                 rc = ena_com_tx_comp_req_id_get(io_cq, &req_id);
1316                 if (unlikely(rc != 0))
1317                         break;
1318
1319                 rc = validate_tx_req_id(tx_ring, req_id);
1320                 if (unlikely(rc != 0))
1321                         break;
1322
1323                 tx_info = &tx_ring->tx_buffer_info[req_id];
1324
1325                 mbuf = tx_info->mbuf;
1326
1327                 tx_info->mbuf = NULL;
1328                 timevalclear(&tx_info->timestamp);
1329
1330                 if (likely(tx_info->num_of_bufs != 0)) {
1331                         /* Map is no longer required */
1332                         bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map);
1333                 }
1334
1335                 ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed",
1336                     tx_ring->qid, mbuf);
1337
1338                 m_freem(mbuf);
1339
1340                 total_done += tx_info->tx_descs;
1341
1342                 tx_ring->free_tx_ids[next_to_clean] = req_id;
1343                 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1344                     tx_ring->ring_size);
1345
1346                 if (unlikely(--commit == 0)) {
1347                         commit = TX_COMMIT;
1348                         /* update ring state every TX_COMMIT descriptor */
1349                         tx_ring->next_to_clean = next_to_clean;
1350                         ena_com_comp_ack(
1351                             &adapter->ena_dev->io_sq_queues[ena_qid],
1352                             total_done);
1353                         ena_com_update_dev_comp_head(io_cq);
1354                         total_done = 0;
1355                 }
1356         } while (likely(--budget));
1357
1358         work_done = TX_BUDGET - budget;
1359
1360         ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d",
1361         tx_ring->qid, work_done);
1362
1363         /* If there is still something to commit update ring state */
1364         if (likely(commit != TX_COMMIT)) {
1365                 tx_ring->next_to_clean = next_to_clean;
1366                 ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
1367                     total_done);
1368                 ena_com_update_dev_comp_head(io_cq);
1369         }
1370
1371         return (work_done);
1372 }
1373
1374 static void
1375 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
1376     struct mbuf *mbuf)
1377 {
1378         struct ena_adapter *adapter = rx_ring->adapter;
1379
1380         if (likely(adapter->rss_support)) {
1381                 //mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
1382                 m_sethash(mbuf, ena_rx_ctx->hash);
1383
1384 #if 0 /* XXX rsstype doesn't seem to be needed by the network stack, we will only supply the hash. */
1385                 if (ena_rx_ctx->frag &&
1386                     (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
1387                         M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1388                         return;
1389                 }
1390
1391                 switch (ena_rx_ctx->l3_proto) {
1392                 case ENA_ETH_IO_L3_PROTO_IPV4:
1393                         switch (ena_rx_ctx->l4_proto) {
1394                         case ENA_ETH_IO_L4_PROTO_TCP:
1395                                 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
1396                                 break;
1397                         case ENA_ETH_IO_L4_PROTO_UDP:
1398                                 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
1399                                 break;
1400                         default:
1401                                 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
1402                         }
1403                         break;
1404                 case ENA_ETH_IO_L3_PROTO_IPV6:
1405                         switch (ena_rx_ctx->l4_proto) {
1406                         case ENA_ETH_IO_L4_PROTO_TCP:
1407                                 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
1408                                 break;
1409                         case ENA_ETH_IO_L4_PROTO_UDP:
1410                                 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
1411                                 break;
1412                         default:
1413                                 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
1414                         }
1415                         break;
1416                 case ENA_ETH_IO_L3_PROTO_UNKNOWN:
1417                         M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1418                         break;
1419                 default:
1420                         M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1421                 }
1422 #endif
1423         } else {
1424                 //mbuf->m_pkthdr.flowid = rx_ring->qid;
1425                 //M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1426                 m_sethash(mbuf, rx_ring->qid);
1427         }
1428 }
1429
1430 /**
1431  * ena_rx_mbuf - assemble mbuf from descriptors
1432  * @rx_ring: ring for which we want to clean packets
1433  * @ena_bufs: buffer info
1434  * @ena_rx_ctx: metadata for this packet(s)
1435  * @next_to_clean: ring pointer, will be updated only upon success
1436  *
1437  **/
1438 static struct mbuf*
1439 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
1440     struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
1441 {
1442         struct mbuf *mbuf;
1443         struct ena_rx_buffer *rx_info;
1444         struct ena_adapter *adapter;
1445         unsigned int descs = ena_rx_ctx->descs;
1446         uint16_t ntc, len, req_id, buf = 0;
1447
1448         ntc = *next_to_clean;
1449         adapter = rx_ring->adapter;
1450         rx_info = &rx_ring->rx_buffer_info[ntc];
1451
1452         if (unlikely(rx_info->mbuf == NULL)) {
1453                 device_printf(adapter->pdev, "NULL mbuf in rx_info");
1454                 return (NULL);
1455         }
1456
1457         len = ena_bufs[buf].len;
1458         req_id = ena_bufs[buf].req_id;
1459         rx_info = &rx_ring->rx_buffer_info[req_id];
1460
1461         ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx",
1462             rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
1463
1464         mbuf = rx_info->mbuf;
1465         mbuf->m_flags |= M_PKTHDR;
1466         mbuf->m_pkthdr.len = len;
1467         mbuf->m_len = len;
1468         mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
1469
1470         /* Fill mbuf with hash key and it's interpretation for optimization */
1471         ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
1472
1473         ena_trace(ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d",
1474             mbuf, mbuf->m_flags, mbuf->m_pkthdr.len);
1475
1476         /* DMA address is not needed anymore, unmap it */
1477         bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
1478
1479         rx_info->mbuf = NULL;
1480         rx_ring->free_rx_ids[ntc] = req_id;
1481         ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
1482
1483         /*
1484          * While we have more than 1 descriptors for one rcvd packet, append
1485          * other mbufs to the main one
1486          */
1487         while (--descs) {
1488                 ++buf;
1489                 len = ena_bufs[buf].len;
1490                 req_id = ena_bufs[buf].req_id;
1491                 rx_info = &rx_ring->rx_buffer_info[req_id];
1492
1493                 if (unlikely(rx_info->mbuf == NULL)) {
1494                         device_printf(adapter->pdev, "NULL mbuf in rx_info");
1495                         /*
1496                          * If one of the required mbufs was not allocated yet,
1497                          * we can break there.
1498                          * All earlier used descriptors will be reallocated
1499                          * later and not used mbufs can be reused.
1500                          * The next_to_clean pointer will not be updated in case
1501                          * of an error, so caller should advance it manually
1502                          * in error handling routine to keep it up to date
1503                          * with hw ring.
1504                          */
1505                         m_freem(mbuf);
1506                         return (NULL);
1507                 }
1508
1509                 if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
1510                         IFNET_STAT_INC(rx_ring->adapter->ifp, ierrors, 1);
1511 #if 0 /* XXX swildner counters */
1512                         counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1513 #endif
1514                         ena_trace(ENA_WARNING, "Failed to append Rx mbuf %p",
1515                             mbuf);
1516                 }
1517
1518                 ena_trace(ENA_DBG | ENA_RXPTH,
1519                     "rx mbuf updated. len %d", mbuf->m_pkthdr.len);
1520
1521                 /* Free already appended mbuf, it won't be useful anymore */
1522                 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
1523                 m_freem(rx_info->mbuf);
1524                 rx_info->mbuf = NULL;
1525
1526                 rx_ring->free_rx_ids[ntc] = req_id;
1527                 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
1528         }
1529
1530         *next_to_clean = ntc;
1531
1532         return (mbuf);
1533 }
1534
1535 /**
1536  * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
1537  **/
1538 static inline void
1539 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
1540     struct mbuf *mbuf)
1541 {
1542
1543         /* if IP and error */
1544         if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1545             ena_rx_ctx->l3_csum_err)) {
1546                 /* ipv4 checksum error */
1547                 mbuf->m_pkthdr.csum_flags = 0;
1548                 IFNET_STAT_INC(rx_ring->adapter->ifp, ierrors, 1);
1549 #if 0 /* XXX swildner counters */
1550                 counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1551 #endif
1552                 ena_trace(ENA_DBG, "RX IPv4 header checksum error");
1553                 return;
1554         }
1555
1556         /* if TCP/UDP */
1557         if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1558             (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
1559                 if (ena_rx_ctx->l4_csum_err) {
1560                         /* TCP/UDP checksum error */
1561                         mbuf->m_pkthdr.csum_flags = 0;
1562                         IFNET_STAT_INC(rx_ring->adapter->ifp, ierrors, 1);
1563 #if 0 /* XXX swildner counters */
1564                         counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1565 #endif
1566                         ena_trace(ENA_DBG, "RX L4 checksum error");
1567                 } else {
1568                         mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1569                         mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1570                 }
1571         }
1572 }
1573
1574 static void
1575 ena_deferred_rx_cleanup(void *arg, int pending)
1576 {
1577         struct ena_ring *rx_ring = arg;
1578         int budget = CLEAN_BUDGET;
1579
1580         ENA_RING_MTX_LOCK(rx_ring);
1581         /*
1582          * If deferred task was executed, perform cleanup of all awaiting
1583          * descs (or until given budget is depleted to avoid infinite loop).
1584          */
1585         while (likely(budget--)) {
1586                 if (ena_rx_cleanup(rx_ring) == 0)
1587                         break;
1588         }
1589         ENA_RING_MTX_UNLOCK(rx_ring);
1590 }
1591
1592 /**
1593  * ena_rx_cleanup - handle rx irq
1594  * @arg: ring for which irq is being handled
1595  **/
1596 static int
1597 ena_rx_cleanup(struct ena_ring *rx_ring)
1598 {
1599         struct ena_adapter *adapter;
1600         struct mbuf *mbuf;
1601         struct ena_com_rx_ctx ena_rx_ctx;
1602         struct ena_com_io_cq* io_cq;
1603         struct ena_com_io_sq* io_sq;
1604         if_t ifp;
1605         uint16_t ena_qid;
1606         uint16_t next_to_clean;
1607         uint32_t refill_required;
1608         uint32_t refill_threshold;
1609         uint32_t do_if_input = 0;
1610         unsigned int qid;
1611         int rc, i;
1612         int budget = RX_BUDGET;
1613
1614         adapter = rx_ring->que->adapter;
1615         ifp = adapter->ifp;
1616         qid = rx_ring->que->id;
1617         ena_qid = ENA_IO_RXQ_IDX(qid);
1618         io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1619         io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
1620         next_to_clean = rx_ring->next_to_clean;
1621
1622         ena_trace(ENA_DBG, "rx: qid %d", qid);
1623
1624         do {
1625                 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1626                 ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
1627                 ena_rx_ctx.descs = 0;
1628                 rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
1629
1630                 if (unlikely(rc != 0))
1631                         goto error;
1632
1633                 if (unlikely(ena_rx_ctx.descs == 0))
1634                         break;
1635
1636                 ena_trace(ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. "
1637                     "descs #: %d l3 proto %d l4 proto %d hash: %x",
1638                     rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1639                     ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1640
1641                 /* Receive mbuf from the ring */
1642                 mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
1643                     &ena_rx_ctx, &next_to_clean);
1644
1645                 /* Exit if we failed to retrieve a buffer */
1646                 if (unlikely(mbuf == NULL)) {
1647                         for (i = 0; i < ena_rx_ctx.descs; ++i) {
1648                                 rx_ring->free_rx_ids[next_to_clean] =
1649                                     rx_ring->ena_bufs[i].req_id;
1650                                 next_to_clean =
1651                                     ENA_RX_RING_IDX_NEXT(next_to_clean,
1652                                     rx_ring->ring_size);
1653
1654                         }
1655                         break;
1656                 }
1657
1658                 /*
1659                  * XXX Removed IFCAP_RXCSUM_IPV6 check because DragonFly
1660                  *     does not seem to support it
1661                  */
1662                 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1663                         ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
1664                 }
1665
1666 #if 0 /* XXX swildner counters */
1667                 counter_enter();
1668                 counter_u64_add_protected(rx_ring->rx_stats.bytes,
1669                     mbuf->m_pkthdr.len);
1670                 counter_u64_add_protected(adapter->hw_stats.rx_bytes,
1671                     mbuf->m_pkthdr.len);
1672                 counter_exit();
1673 #endif
1674                 /*
1675                  * LRO is only for IP/TCP packets and TCP checksum of the packet
1676                  * should be computed by hardware.
1677                  */
1678                 do_if_input = 1;
1679 #if 0 /* XXX LRO */
1680                 if (((ifp->if_capenable & IFCAP_LRO) != 0)  &&
1681                     ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
1682                     (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
1683                         /*
1684                          * Send to the stack if:
1685                          *  - LRO not enabled, or
1686                          *  - no LRO resources, or
1687                          *  - lro enqueue fails
1688                          */
1689                         if ((rx_ring->lro.lro_cnt != 0) &&
1690                             (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
1691                                         do_if_input = 0;
1692                 }
1693 #endif
1694                 if (do_if_input != 0) {
1695                         ena_trace(ENA_DBG | ENA_RXPTH,
1696                             "calling if_input() with mbuf %p", mbuf);
1697                         ENA_RING_MTX_UNLOCK(rx_ring);
1698                         (*ifp->if_input)(ifp, mbuf, NULL, -1);
1699                         ENA_RING_MTX_LOCK(rx_ring);
1700                 }
1701
1702                 IFNET_STAT_INC(ifp, ipackets, 1);
1703 #if 0 /* XXX swildner counters */
1704                 counter_enter();
1705                 counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
1706                 counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
1707                 counter_exit();
1708 #endif
1709         } while (--budget);
1710
1711         rx_ring->next_to_clean = next_to_clean;
1712
1713         refill_required = ena_com_free_desc(io_sq);
1714         refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
1715
1716         if (refill_required > refill_threshold) {
1717                 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1718                 ena_refill_rx_bufs(rx_ring, refill_required);
1719         }
1720
1721 #if 0 /* XXX LRO */
1722         tcp_lro_flush_all(&rx_ring->lro);
1723 #endif
1724
1725         return (RX_BUDGET - budget);
1726
1727 error:
1728         IFNET_STAT_INC(rx_ring->adapter->ifp, ierrors, 1);
1729 #if 0 /* XXX swildner counters */
1730         counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1);
1731 #endif
1732         return (RX_BUDGET - budget);
1733 }
1734
1735 /*********************************************************************
1736  *
1737  *  MSIX & Interrupt Service routine
1738  *
1739  **********************************************************************/
1740
1741 /**
1742  * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1743  * @arg: interrupt number
1744  **/
1745 static void
1746 ena_intr_msix_mgmnt(void *arg)
1747 {
1748         struct ena_adapter *adapter = (struct ena_adapter *)arg;
1749
1750         ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1751         if (likely(adapter->running))
1752                 ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1753 }
1754
1755 /**
1756  * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1757  * @arg: interrupt number
1758  **/
1759 static void
1760 ena_handle_msix(void *arg)
1761 {
1762         struct ena_que  *que = arg;
1763         struct ena_adapter *adapter = que->adapter;
1764         if_t ifp = adapter->ifp;
1765         struct ena_ring *tx_ring;
1766         struct ena_ring *rx_ring;
1767         struct ena_com_io_cq* io_cq;
1768         struct ena_eth_io_intr_reg intr_reg;
1769         int qid, ena_qid;
1770         int txc, rxc, i;
1771
1772         if (unlikely((ifp->if_flags & IFF_RUNNING) == 0))
1773                 return;
1774
1775         ena_trace(ENA_DBG, "MSI-X TX/RX routine");
1776
1777         tx_ring = que->tx_ring;
1778         rx_ring = que->rx_ring;
1779         qid = que->id;
1780         ena_qid = ENA_IO_TXQ_IDX(qid);
1781         io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1782
1783         for (i = 0; i < CLEAN_BUDGET; ++i) {
1784                 /*
1785                  * If lock cannot be acquired, then deferred cleanup task was
1786                  * being executed and rx ring is being cleaned up in
1787                  * another thread.
1788                  */
1789                 if (likely(ENA_RING_MTX_TRYLOCK(rx_ring) != 0)) {
1790                         rxc = ena_rx_cleanup(rx_ring);
1791                         ENA_RING_MTX_UNLOCK(rx_ring);
1792                 } else {
1793                         rxc = 0;
1794                 }
1795
1796                 /* Protection from calling ena_tx_cleanup from ena_start_xmit */
1797                 ENA_RING_MTX_LOCK(tx_ring);
1798                 txc = ena_tx_cleanup(tx_ring);
1799                 ENA_RING_MTX_UNLOCK(tx_ring);
1800
1801                 if (unlikely((ifp->if_flags & IFF_RUNNING) == 0))
1802                         return;
1803
1804                 if ((txc != TX_BUDGET) && (rxc != RX_BUDGET))
1805                        break;
1806         }
1807
1808         /* Signal that work is done and unmask interrupt */
1809         ena_com_update_intr_reg(&intr_reg,
1810             RX_IRQ_INTERVAL,
1811             TX_IRQ_INTERVAL,
1812             true);
1813         ena_com_unmask_intr(io_cq, &intr_reg);
1814 }
1815
1816 static int
1817 ena_enable_msix(struct ena_adapter *adapter)
1818 {
1819         device_t dev = adapter->pdev;
1820         int msix_vecs;
1821         int error, i, rc = 0;
1822
1823         /* Reserved the max msix vectors we might need */
1824         msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_queues);
1825
1826         adapter->msix_entries = kmalloc(msix_vecs * sizeof(struct msix_entry),
1827             M_DEVBUF, M_WAITOK | M_ZERO);
1828
1829         ena_trace(ENA_DBG, "trying to enable MSI-X, vectors: %d", msix_vecs);
1830
1831         for (i = 0; i < msix_vecs; i++) {
1832                 adapter->msix_entries[i].entry = i;
1833                 /* Vectors must start from 1 */
1834                 adapter->msix_entries[i].vector = i + 1;
1835         }
1836
1837         error = pci_setup_msix(dev);
1838         if (error) {
1839                 device_printf(dev, "pci_setup_msix() failed\n");
1840                 goto err_msix_free;
1841         }
1842
1843         adapter->msix_vecs = msix_vecs;
1844         adapter->msix_enabled = true;
1845
1846         return (0);
1847
1848 err_msix_free:
1849         kfree(adapter->msix_entries, M_DEVBUF);
1850         adapter->msix_entries = NULL;
1851
1852         return (rc);
1853 }
1854
1855 static void
1856 ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1857 {
1858
1859         ksnprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1860             ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1861             device_get_nameunit(adapter->pdev));
1862         /*
1863          * Handler is NULL on purpose, it will be set
1864          * when mgmnt interrupt is acquired
1865          */
1866         adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1867         adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1868         adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1869             adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1870 }
1871
1872 static void
1873 ena_setup_io_intr(struct ena_adapter *adapter)
1874 {
1875         static int last_bind_cpu = -1;
1876         int irq_idx;
1877
1878         for (int i = 0; i < adapter->num_queues; i++) {
1879                 irq_idx = ENA_IO_IRQ_IDX(i);
1880
1881                 ksnprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1882                     "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1883                 adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1884                 adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1885                 adapter->irq_tbl[irq_idx].vector =
1886                     adapter->msix_entries[irq_idx].vector;
1887                 ena_trace(ENA_INFO | ENA_IOQ, "ena_setup_io_intr vector: %d\n",
1888                     adapter->msix_entries[irq_idx].vector);
1889 #ifdef  RSS
1890                 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1891                     rss_getcpu(i % rss_getnumbuckets());
1892 #else
1893                 /*
1894                  * We still want to bind rings to the corresponding cpu
1895                  * using something similar to the RSS round-robin technique.
1896                  *
1897                  * XXX It seems that this can be removed since DragonFly has
1898                  *     native support for RSS. DragonFly also does not have
1899                  *     support for CPU_FIRST or CPU_NEXT.
1900                  */
1901
1902                 if (last_bind_cpu < 0)
1903                         last_bind_cpu = (last_bind_cpu + 1) % ncpus;
1904                 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1905                     last_bind_cpu;
1906                 last_bind_cpu = (last_bind_cpu + 1) % ncpus;
1907 #endif
1908         }
1909 }
1910
1911 static int
1912 ena_request_mgmnt_irq(struct ena_adapter *adapter)
1913 {
1914         struct ena_irq *irq;
1915         unsigned long flags;
1916         int error, rc, rcc;
1917
1918         flags = RF_ACTIVE | RF_SHAREABLE;
1919
1920         irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1921
1922         error = pci_alloc_msix_vector(adapter->pdev, 0, &irq->vector, 0);
1923         if (error) {
1924                 device_printf(adapter->pdev, "Could not initialize MGMNT MSI-X Vector on cpu0\n");
1925                 return (ENXIO);
1926         }
1927
1928         irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1929             &irq->vector, flags);
1930
1931         if (unlikely(irq->res == NULL)) {
1932                 device_printf(adapter->pdev, "could not allocate "
1933                     "irq vector: %d\n", irq->vector);
1934                 pci_release_msix_vector(adapter->pdev, irq->vector);
1935                 return (ENXIO);
1936         }
1937
1938         rc = bus_activate_resource(adapter->pdev, SYS_RES_IRQ,
1939             irq->vector, irq->res);
1940         if (unlikely(rc != 0)) {
1941                 device_printf(adapter->pdev, "could not activate "
1942                     "irq vector: %d\n", irq->vector);
1943                 goto err_res_free;
1944         }
1945
1946         rc = bus_setup_intr(adapter->pdev, irq->res,
1947             INTR_MPSAFE, ena_intr_msix_mgmnt,
1948             irq->data, &irq->cookie, NULL);
1949         if (unlikely(rc != 0)) {
1950                 device_printf(adapter->pdev, "failed to register "
1951                     "interrupt handler for irq %ju: %d\n",
1952                     rman_get_start(irq->res), rc);
1953                 goto err_res_free;
1954         }
1955         irq->requested = true;
1956
1957         return (rc);
1958
1959 err_res_free:
1960         ena_trace(ENA_INFO | ENA_ADMQ, "releasing resource for irq %d\n",
1961             irq->vector);
1962         rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1963             irq->vector, irq->res);
1964         pci_release_msix_vector(adapter->pdev, irq->vector);
1965         if (unlikely(rcc != 0))
1966                 device_printf(adapter->pdev, "dev has no parent while "
1967                     "releasing res for irq: %d\n", irq->vector);
1968         irq->res = NULL;
1969
1970         return (rc);
1971 }
1972
1973 static int
1974 ena_request_io_irq(struct ena_adapter *adapter)
1975 {
1976         struct ena_irq *irq;
1977         unsigned long flags = 0;
1978         int rc = 0, i, rcc, error;
1979
1980         if (unlikely(adapter->msix_enabled == 0)) {
1981                 device_printf(adapter->pdev,
1982                     "failed to request I/O IRQ: MSI-X is not enabled\n");
1983                 return (EINVAL);
1984         } else {
1985                 flags = RF_ACTIVE | RF_SHAREABLE;
1986         }
1987
1988         for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1989                 irq = &adapter->irq_tbl[i];
1990
1991                 if (unlikely(irq->requested))
1992                         continue;
1993
1994                 error = pci_alloc_msix_vector(adapter->pdev, i, &irq->vector, irq->cpu);
1995                 if (error) {
1996                         device_printf(adapter->pdev, "Unable to allocated MSI-X %d on cpu%d\n", i, irq->cpu);
1997                         goto err;
1998                 }
1999
2000                 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
2001                     &irq->vector, flags);
2002                 if (unlikely(irq->res == NULL)) {
2003                         device_printf(adapter->pdev, "could not allocate "
2004                             "irq vector: %d\n", irq->vector);
2005                         goto err;
2006                 }
2007
2008
2009                 /*
2010                  * TODO: Might need to setup desc and use irq->name as the
2011                  *       value
2012                  */
2013                 rc = bus_setup_intr(adapter->pdev, irq->res,
2014                     INTR_MPSAFE,
2015                     irq->handler, irq->data, &irq->cookie, NULL);
2016                  if (unlikely(rc != 0)) {
2017                         device_printf(adapter->pdev, "failed to register "
2018                             "interrupt handler for irq %ju: %d\n",
2019                             rman_get_start(irq->res), rc);
2020                         goto err;
2021                 }
2022                 irq->requested = true;
2023
2024 #ifdef  RSS
2025                 ena_trace(ENA_INFO, "queue %d - RSS bucket %d\n",
2026                     i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
2027 #else
2028                 ena_trace(ENA_INFO, "queue %d - cpu %d\n",
2029                     i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
2030 #endif
2031         }
2032
2033         return (rc);
2034
2035 err:
2036
2037         for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
2038                 irq = &adapter->irq_tbl[i];
2039                 rcc = 0;
2040
2041                 /* Once we entered err: section and irq->requested is true we
2042                    free both intr and resources */
2043                 if (irq->requested)
2044                         rcc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
2045                 if (unlikely(rcc != 0))
2046                         device_printf(adapter->pdev, "could not release"
2047                             " irq: %d, error: %d\n", irq->vector, rcc);
2048
2049                 /* If we entred err: section without irq->requested set we know
2050                    it was bus_alloc_resource_any() that needs cleanup, provided
2051                    res is not NULL. In case res is NULL no work in needed in
2052                    this iteration */
2053                 rcc = 0;
2054                 if (irq->res != NULL) {
2055                         rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2056                             irq->vector, irq->res);
2057                         pci_release_msix_vector(adapter->pdev, irq->vector);
2058                 }
2059                 if (unlikely(rcc != 0))
2060                         device_printf(adapter->pdev, "dev has no parent while "
2061                             "releasing res for irq: %d\n", irq->vector);
2062                 irq->requested = false;
2063                 irq->res = NULL;
2064         }
2065
2066         return (rc);
2067 }
2068
2069 static void
2070 ena_free_mgmnt_irq(struct ena_adapter *adapter)
2071 {
2072         struct ena_irq *irq;
2073         int rc;
2074
2075         irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2076         if (irq->requested) {
2077                 ena_trace(ENA_INFO | ENA_ADMQ, "tear down irq: %d\n",
2078                     irq->vector);
2079                 rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
2080                 if (unlikely(rc != 0))
2081                         device_printf(adapter->pdev, "failed to tear "
2082                             "down irq: %d\n", irq->vector);
2083                 irq->requested = 0;
2084         }
2085
2086         if (irq->res != NULL) {
2087                 ena_trace(ENA_INFO | ENA_ADMQ, "release resource irq: %d\n",
2088                     irq->vector);
2089                 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2090                     irq->vector, irq->res);
2091                 pci_release_msix_vector(adapter->pdev, irq->vector);
2092                 irq->res = NULL;
2093                 if (unlikely(rc != 0))
2094                         device_printf(adapter->pdev, "dev has no parent while "
2095                             "releasing res for irq: %d\n", irq->vector);
2096         }
2097 }
2098
2099 static void
2100 ena_free_io_irq(struct ena_adapter *adapter)
2101 {
2102         struct ena_irq *irq;
2103         int rc;
2104
2105         for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
2106                 irq = &adapter->irq_tbl[i];
2107                 if (irq->requested) {
2108                         ena_trace(ENA_INFO | ENA_IOQ, "tear down irq: %d\n",
2109                             irq->vector);
2110                         rc = bus_teardown_intr(adapter->pdev, irq->res,
2111                             irq->cookie);
2112                         if (unlikely(rc != 0)) {
2113                                 device_printf(adapter->pdev, "failed to tear "
2114                                     "down irq: %d\n", irq->vector);
2115                         }
2116                         irq->requested = 0;
2117                 }
2118
2119                 if (irq->res != NULL) {
2120                         ena_trace(ENA_INFO | ENA_IOQ, "release resource irq: %d\n",
2121                             irq->vector);
2122                         rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2123                             irq->vector, irq->res);
2124                         pci_release_msix_vector(adapter->pdev, irq->vector);
2125                         irq->res = NULL;
2126                         if (unlikely(rc != 0)) {
2127                                 device_printf(adapter->pdev, "dev has no parent"
2128                                     " while releasing res for irq: %d\n",
2129                                     irq->vector);
2130                         }
2131                 }
2132         }
2133 }
2134
2135 static void
2136 ena_free_irqs(struct ena_adapter* adapter)
2137 {
2138
2139         ena_free_io_irq(adapter);
2140         ena_free_mgmnt_irq(adapter);
2141         ena_disable_msix(adapter);
2142 }
2143
2144 static void
2145 ena_disable_msix(struct ena_adapter *adapter)
2146 {
2147
2148         pci_release_msi(adapter->pdev);
2149
2150         adapter->msix_vecs = 0;
2151         kfree(adapter->msix_entries, M_DEVBUF);
2152         adapter->msix_entries = NULL;
2153 }
2154
2155 static void
2156 ena_unmask_all_io_irqs(struct ena_adapter *adapter)
2157 {
2158         struct ena_com_io_cq* io_cq;
2159         struct ena_eth_io_intr_reg intr_reg;
2160         uint16_t ena_qid;
2161         int i;
2162
2163         /* Unmask interrupts for all queues */
2164         for (i = 0; i < adapter->num_queues; i++) {
2165                 ena_qid = ENA_IO_TXQ_IDX(i);
2166                 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
2167                 ena_com_update_intr_reg(&intr_reg, 0, 0, true);
2168                 ena_com_unmask_intr(io_cq, &intr_reg);
2169         }
2170 }
2171
2172 /* Configure the Rx forwarding */
2173 static int
2174 ena_rss_configure(struct ena_adapter *adapter)
2175 {
2176         struct ena_com_dev *ena_dev = adapter->ena_dev;
2177         int rc;
2178
2179         /* Set indirect table */
2180         rc = ena_com_indirect_table_set(ena_dev);
2181         if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2182                 return (rc);
2183
2184         /* Configure hash function (if supported) */
2185         rc = ena_com_set_hash_function(ena_dev);
2186         if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2187                 return (rc);
2188
2189         /* Configure hash inputs (if supported) */
2190         rc = ena_com_set_hash_ctrl(ena_dev);
2191         if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2192                 return (rc);
2193
2194         return (0);
2195 }
2196
2197 static int
2198 ena_up_complete(struct ena_adapter *adapter)
2199 {
2200         int rc;
2201
2202         if (likely(adapter->rss_support)) {
2203                 rc = ena_rss_configure(adapter);
2204                 if (rc != 0)
2205                         return (rc);
2206         }
2207
2208         rc = ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);
2209         if (unlikely(rc != 0))
2210                 return (rc);
2211
2212         ena_refill_all_rx_bufs(adapter);
2213 #if 0 /* XXX swildner counters */
2214         ena_reset_counters((counter_u64_t *)&adapter->hw_stats,
2215             sizeof(adapter->hw_stats));
2216 #endif
2217
2218         return (0);
2219 }
2220
2221 static int
2222 ena_up(struct ena_adapter *adapter)
2223 {
2224         int rc = 0;
2225
2226         if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2227                 device_printf(adapter->pdev, "device is not attached!\n");
2228                 return (ENXIO);
2229         }
2230
2231         if (unlikely(!adapter->running)) {
2232                 device_printf(adapter->pdev, "device is not running!\n");
2233                 return (ENXIO);
2234         }
2235
2236         if (!adapter->up) {
2237                 device_printf(adapter->pdev, "device is going UP\n");
2238
2239                 /* setup interrupts for IO queues */
2240                 ena_setup_io_intr(adapter);
2241                 rc = ena_request_io_irq(adapter);
2242                 if (unlikely(rc != 0)) {
2243                         ena_trace(ENA_ALERT, "err_req_irq");
2244                         goto err_req_irq;
2245                 }
2246
2247                 /* allocate transmit descriptors */
2248                 rc = ena_setup_all_tx_resources(adapter);
2249                 if (unlikely(rc != 0)) {
2250                         ena_trace(ENA_ALERT, "err_setup_tx");
2251                         goto err_setup_tx;
2252                 }
2253
2254                 /* allocate receive descriptors */
2255                 rc = ena_setup_all_rx_resources(adapter);
2256                 if (unlikely(rc != 0)) {
2257                         ena_trace(ENA_ALERT, "err_setup_rx");
2258                         goto err_setup_rx;
2259                 }
2260
2261                 /* create IO queues for Rx & Tx */
2262                 rc = ena_create_io_queues(adapter);
2263                 if (unlikely(rc != 0)) {
2264                         ena_trace(ENA_ALERT,
2265                             "create IO queues failed");
2266                         goto err_io_que;
2267                 }
2268
2269                 if (unlikely(adapter->link_status)) {
2270                         adapter->ifp->if_link_state = LINK_STATE_UP;
2271                         if_link_state_change(adapter->ifp);
2272                 }
2273
2274                 rc = ena_up_complete(adapter);
2275                 if (unlikely(rc != 0))
2276                         goto err_up_complete;
2277
2278 #if 0 /* XXX swildner counters */
2279                 counter_u64_add(adapter->dev_stats.interface_up, 1);
2280 #endif
2281
2282                 ena_update_hwassist(adapter);
2283
2284                 adapter->ifp->if_flags |= IFF_RUNNING;
2285                 ifq_clr_oactive(&adapter->ifp->if_snd);
2286
2287                 callout_reset(&adapter->timer_service, hz,
2288                     ena_timer_service, (void *)adapter);
2289
2290                 adapter->up = true;
2291
2292                 ena_unmask_all_io_irqs(adapter);
2293         }
2294
2295         return (0);
2296
2297 err_up_complete:
2298         ena_destroy_all_io_queues(adapter);
2299 err_io_que:
2300         ena_free_all_rx_resources(adapter);
2301 err_setup_rx:
2302         ena_free_all_tx_resources(adapter);
2303 err_setup_tx:
2304         ena_free_io_irq(adapter);
2305 err_req_irq:
2306         return (rc);
2307 }
2308
2309 #if 0 /* XXX swildner counters */
2310 static uint64_t
2311 ena_get_counter(if_t ifp, ift_counter cnt)
2312 {
2313         struct ena_adapter *adapter;
2314         struct ena_hw_stats *stats;
2315
2316         adapter = ifp->if_softc;
2317         stats = &adapter->hw_stats;
2318
2319         switch (cnt) {
2320         case IFCOUNTER_IPACKETS:
2321                 return (counter_u64_fetch(stats->rx_packets));
2322         case IFCOUNTER_OPACKETS:
2323                 return (counter_u64_fetch(stats->tx_packets));
2324         case IFCOUNTER_IBYTES:
2325                 return (counter_u64_fetch(stats->rx_bytes));
2326         case IFCOUNTER_OBYTES:
2327                 return (counter_u64_fetch(stats->tx_bytes));
2328         case IFCOUNTER_IQDROPS:
2329                 return (counter_u64_fetch(stats->rx_drops));
2330         default:
2331                 return (if_get_counter_default(ifp, cnt));
2332         }
2333 }
2334 #endif
2335
2336 static int
2337 ena_media_change(if_t ifp)
2338 {
2339         /* Media Change is not supported by firmware */
2340         return (0);
2341 }
2342
2343 static void
2344 ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2345 {
2346         struct ena_adapter *adapter = ifp->if_softc;
2347         ena_trace(ENA_DBG, "enter");
2348
2349         lockmgr(&adapter->global_lock, LK_EXCLUSIVE);
2350
2351         ifmr->ifm_status = IFM_AVALID;
2352         ifmr->ifm_active = IFM_ETHER;
2353
2354         if (!adapter->link_status) {
2355                 lockmgr(&adapter->global_lock, LK_RELEASE);
2356                 ena_trace(ENA_INFO, "link_status = false");
2357                 return;
2358         }
2359
2360         ifmr->ifm_status |= IFM_ACTIVE;
2361         ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
2362
2363         lockmgr(&adapter->global_lock, LK_RELEASE);
2364 }
2365
2366 static void
2367 ena_init(void *arg)
2368 {
2369         struct ena_adapter *adapter = (struct ena_adapter *)arg;
2370
2371         if (!adapter->up) {
2372                 lockmgr(&adapter->ioctl_lock, LK_EXCLUSIVE);
2373                 ena_up(adapter);
2374                 lockmgr(&adapter->ioctl_lock, LK_RELEASE);
2375         }
2376 }
2377
2378 static int
2379 ena_ioctl(if_t ifp, u_long command, caddr_t data, struct ucred *cred)
2380 {
2381         struct ena_adapter *adapter;
2382         struct ifreq *ifr;
2383         int rc;
2384
2385         adapter = ifp->if_softc;
2386         ifr = (struct ifreq *)data;
2387
2388         /*
2389          * Acquiring lock to prevent from running up and down routines parallel.
2390          */
2391         rc = 0;
2392         switch (command) {
2393         case SIOCSIFMTU:
2394                 lockmgr(&adapter->ioctl_lock, LK_EXCLUSIVE);
2395                 ena_down(adapter);
2396
2397                 ena_change_mtu(ifp, ifr->ifr_mtu);
2398
2399                 rc = ena_up(adapter);
2400                 lockmgr(&adapter->ioctl_lock, LK_RELEASE);
2401                 break;
2402
2403         case SIOCSIFFLAGS:
2404                 if ((ifp->if_flags & IFF_UP) != 0) {
2405                         if ((ifp->if_flags & IFF_RUNNING) != 0) {
2406                                 if ((ifp->if_flags & (IFF_PROMISC |
2407                                     IFF_ALLMULTI)) != 0) {
2408                                         device_printf(adapter->pdev,
2409                                             "ioctl promisc/allmulti\n");
2410                                 }
2411                         } else {
2412                                 lockmgr(&adapter->ioctl_lock, LK_EXCLUSIVE);
2413                                 rc = ena_up(adapter);
2414                                 lockmgr(&adapter->ioctl_lock, LK_RELEASE);
2415                         }
2416                 } else {
2417                         if ((ifp->if_flags & IFF_RUNNING) != 0) {
2418                                 lockmgr(&adapter->ioctl_lock, LK_EXCLUSIVE);
2419                                 ena_down(adapter);
2420                                 lockmgr(&adapter->ioctl_lock, LK_RELEASE);
2421                         }
2422                 }
2423                 break;
2424
2425         case SIOCADDMULTI:
2426         case SIOCDELMULTI:
2427                 break;
2428
2429         case SIOCSIFMEDIA:
2430         case SIOCGIFMEDIA:
2431                 rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2432                 break;
2433
2434         case SIOCSIFCAP:
2435                 {
2436                         int reinit = 0;
2437
2438                         if (ifr->ifr_reqcap != ifp->if_capenable) {
2439                                 ifp->if_capenable = ifr->ifr_reqcap;
2440                                 reinit = 1;
2441                         }
2442
2443                         if ((reinit != 0) &&
2444                             ((ifp->if_flags & IFF_RUNNING) != 0)) {
2445                                 lockmgr(&adapter->ioctl_lock, LK_EXCLUSIVE);
2446                                 ena_down(adapter);
2447                                 rc = ena_up(adapter);
2448                                 lockmgr(&adapter->ioctl_lock, LK_RELEASE);
2449                         }
2450                 }
2451
2452                 break;
2453         default:
2454                 rc = ether_ioctl(ifp, command, data);
2455                 break;
2456         }
2457
2458         return (rc);
2459 }
2460
2461 static int
2462 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2463 {
2464         int caps = 0;
2465
2466         if ((feat->offload.tx &
2467             (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2468             ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2469                 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2470                 caps |= IFCAP_TXCSUM;
2471
2472         if ((feat->offload.tx &
2473             (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2474             ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2475                 caps |= IFCAP_TXCSUM;
2476
2477         if ((feat->offload.tx &
2478             ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2479                 caps |= IFCAP_TSO4;
2480
2481         if ((feat->offload.tx &
2482             ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2483                 caps |= IFCAP_TSO6;
2484
2485         if ((feat->offload.rx_supported &
2486             (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2487             ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2488                 caps |= IFCAP_RXCSUM;
2489
2490 #if 0
2491         if ((feat->offload.rx_supported &
2492             ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2493                 caps |= IFCAP_RXCSUM_IPV6;
2494 #endif
2495 #if 0 /* XXX LRO */
2496         caps |= IFCAP_LRO;
2497 #endif
2498         caps |= IFCAP_JUMBO_MTU;
2499
2500         return (caps);
2501 }
2502
2503 static void
2504 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2505 {
2506
2507         host_info->supported_network_features[0] =
2508             (uint32_t)ifp->if_capabilities;
2509 }
2510
2511 static void
2512 ena_update_hwassist(struct ena_adapter *adapter)
2513 {
2514         if_t ifp = adapter->ifp;
2515         uint32_t feat = adapter->tx_offload_cap;
2516         int cap = ifp->if_capenable;
2517         int flags = 0;
2518
2519         ifp->if_hwassist = 0;
2520
2521         if ((cap & IFCAP_TXCSUM) != 0) {
2522                 if ((feat &
2523                     ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2524                         flags |= CSUM_IP;
2525                 if ((feat &
2526                     (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2527                     ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2528                         flags |= CSUM_UDP | CSUM_TCP;
2529         }
2530
2531 #if 0
2532         if ((cap & IFCAP_TXCSUM_IPV6) != 0)
2533                 flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2534 #endif
2535
2536         if ((cap & IFCAP_TSO4) != 0 || (cap & IFCAP_TSO6) != 0)
2537                 flags |= CSUM_TSO;
2538
2539         ifp->if_hwassist |= flags;
2540 }
2541
2542 static int
2543 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2544     struct ena_com_dev_get_features_ctx *feat)
2545 {
2546         if_t ifp;
2547         int caps = 0;
2548
2549         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2550         if (unlikely(ifp == NULL)) {
2551                 ena_trace(ENA_ALERT, "can not allocate ifnet structure\n");
2552                 return (ENXIO);
2553         }
2554         if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2555         ifp->if_softc = adapter;
2556
2557         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2558
2559         ifp->if_init = ena_init;
2560         ifp->if_start = ena_start_xmit;
2561         ifp->if_ioctl = ena_ioctl;
2562 #if 0 /* XXX swildner counter */
2563         if_setgetcounterfn(ifp, ena_get_counter);
2564 #endif
2565
2566         ifq_set_maxlen(&ifp->if_snd, adapter->tx_ring_size);
2567         ifq_set_ready(&ifp->if_snd);
2568         ifp->if_mtu = ETHERMTU;
2569         ifp->if_baudrate = 0;
2570         /* Zeroize capabilities... */
2571         ifp->if_capabilities = 0;
2572         ifp->if_capenable = 0;
2573         /* check hardware support */
2574         caps = ena_get_dev_offloads(feat);
2575         /* ... and set them */
2576         //if_setcapabilitiesbit(ifp, caps, 0);
2577         ((struct ifnet *)ifp)->if_capabilities |= caps;
2578         ((struct ifnet *)ifp)->if_capabilities &= ~0;
2579
2580         /* TSO parameters */
2581         //ifp->if_hw_tsomax = ENA_TSO_MAXSIZE -
2582         //    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2583         ifp->if_tsolen = adapter->max_tx_sgl_size - 1;
2584         //ifp->if_hw_tsomaxsegsize = ENA_TSO_MAXSIZE;
2585
2586         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
2587         ifp->if_capenable= ifp->if_capabilities;
2588
2589         /*
2590          * Specify the media types supported by this adapter and register
2591          * callbacks to update media and link information
2592          */
2593         ifmedia_init(&adapter->media, IFM_IMASK,
2594             ena_media_change, ena_media_status);
2595         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2596         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2597
2598         ether_ifattach(ifp, adapter->mac_addr, NULL);
2599
2600         return (0);
2601 }
2602
2603 static void
2604 ena_down(struct ena_adapter *adapter)
2605 {
2606         int rc;
2607
2608         if (adapter->up) {
2609                 device_printf(adapter->pdev, "device is going DOWN\n");
2610
2611                 callout_drain(&adapter->timer_service);
2612
2613                 adapter->up = false;
2614                 ifq_set_oactive(&adapter->ifp->if_snd);
2615                 adapter->ifp->if_flags &= ~IFF_RUNNING;
2616
2617                 ena_free_io_irq(adapter);
2618
2619                 if (adapter->trigger_reset) {
2620                         rc = ena_com_dev_reset(adapter->ena_dev,
2621                             adapter->reset_reason);
2622                         if (unlikely(rc != 0))
2623                                 device_printf(adapter->pdev,
2624                                     "Device reset failed\n");
2625                 }
2626
2627                 ena_destroy_all_io_queues(adapter);
2628
2629                 ena_free_all_tx_bufs(adapter);
2630                 ena_free_all_rx_bufs(adapter);
2631                 ena_free_all_tx_resources(adapter);
2632                 ena_free_all_rx_resources(adapter);
2633
2634 #if 0 /* XXX swildner counters */
2635                 counter_u64_add(adapter->dev_stats.interface_down, 1);
2636 #endif
2637         }
2638 }
2639
2640 static void
2641 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf)
2642 {
2643         struct ena_com_tx_meta *ena_meta;
2644         struct ether_vlan_header *eh;
2645         u32 mss;
2646         bool offload;
2647         uint16_t etype;
2648         int ehdrlen;
2649         struct ip *ip;
2650         int iphlen;
2651         struct tcphdr *th;
2652
2653         offload = false;
2654         ena_meta = &ena_tx_ctx->ena_meta;
2655         mss = mbuf->m_pkthdr.tso_segsz;
2656
2657         if (mss != 0)
2658                 offload = true;
2659
2660         if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2661                 offload = true;
2662
2663         if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
2664                 offload = true;
2665
2666         if (!offload) {
2667                 ena_tx_ctx->meta_valid = 0;
2668                 return;
2669         }
2670
2671         /* Determine where frame payload starts. */
2672         eh = mtod(mbuf, struct ether_vlan_header *);
2673         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2674                 etype = ntohs(eh->evl_proto);
2675                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2676         } else {
2677                 etype = ntohs(eh->evl_encap_proto);
2678                 ehdrlen = ETHER_HDR_LEN;
2679         }
2680
2681         ip = (struct ip *)(mbuf->m_data + ehdrlen);
2682         iphlen = ip->ip_hl << 2;
2683         th = (struct tcphdr *)((caddr_t)ip + iphlen);
2684
2685         if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2686                 ena_tx_ctx->l3_csum_enable = 1;
2687         }
2688         if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
2689                 ena_tx_ctx->tso_enable = 1;
2690                 ena_meta->l4_hdr_len = (th->th_off);
2691         }
2692
2693         switch (etype) {
2694         case ETHERTYPE_IP:
2695                 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2696                 if ((ip->ip_off & htons(IP_DF)) != 0)
2697                         ena_tx_ctx->df = 1;
2698                 break;
2699         case ETHERTYPE_IPV6:
2700                 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2701
2702         default:
2703                 break;
2704         }
2705
2706         if (ip->ip_p == IPPROTO_TCP) {
2707                 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2708                 if ((mbuf->m_pkthdr.csum_flags &
2709                     CSUM_TCP) != 0)
2710                         ena_tx_ctx->l4_csum_enable = 1;
2711                 else
2712                         ena_tx_ctx->l4_csum_enable = 0;
2713         } else if (ip->ip_p == IPPROTO_UDP) {
2714                 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2715                 if ((mbuf->m_pkthdr.csum_flags &
2716                     CSUM_UDP) != 0)
2717                         ena_tx_ctx->l4_csum_enable = 1;
2718                 else
2719                         ena_tx_ctx->l4_csum_enable = 0;
2720         } else {
2721                 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
2722                 ena_tx_ctx->l4_csum_enable = 0;
2723         }
2724
2725         ena_meta->mss = mss;
2726         ena_meta->l3_hdr_len = iphlen;
2727         ena_meta->l3_hdr_offset = ehdrlen;
2728         ena_tx_ctx->meta_valid = 1;
2729 }
2730
2731 static int
2732 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
2733 {
2734         struct ena_adapter *adapter;
2735         struct mbuf *collapsed_mbuf;
2736         int num_frags;
2737
2738         adapter = tx_ring->adapter;
2739         num_frags = ena_mbuf_count(*mbuf);
2740
2741         /* One segment must be reserved for configuration descriptor. */
2742         if (num_frags < adapter->max_tx_sgl_size)
2743                 return (0);
2744 #if 0 /* XXX swildner counters */
2745         counter_u64_add(tx_ring->tx_stats.collapse, 1);
2746 #endif
2747
2748         collapsed_mbuf = m_defrag(*mbuf, M_NOWAIT);
2749         if (unlikely(collapsed_mbuf == NULL)) {
2750                 IFNET_STAT_INC(tx_ring->adapter->ifp, oerrors, 1);
2751 #if 0 /* XXX swildner counters */
2752                 counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
2753 #endif
2754                 return (ENOMEM);
2755         }
2756
2757         /* If mbuf was collapsed succesfully, original mbuf is released. */
2758         *mbuf = collapsed_mbuf;
2759
2760         return (0);
2761 }
2762
2763 static int
2764 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
2765 {
2766         struct ena_adapter *adapter;
2767         struct ena_tx_buffer *tx_info;
2768         struct ena_com_tx_ctx ena_tx_ctx;
2769         struct ena_com_dev *ena_dev;
2770         struct ena_com_buf *ena_buf;
2771         struct ena_com_io_sq* io_sq;
2772         bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
2773         void *push_hdr;
2774         uint16_t next_to_use;
2775         uint16_t req_id;
2776         uint16_t push_len;
2777         uint16_t ena_qid;
2778         uint32_t len, nsegs, header_len;
2779         int i, rc;
2780         int nb_hw_desc;
2781
2782         ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
2783         adapter = tx_ring->que->adapter;
2784         ena_dev = adapter->ena_dev;
2785         io_sq = &ena_dev->io_sq_queues[ena_qid];
2786
2787         //tx_ring is just used to grab the adapter
2788         rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
2789         if (unlikely(rc != 0)) {
2790                 ena_trace(ENA_WARNING,
2791                     "Failed to collapse mbuf! err: %d", rc);
2792                 return (rc);
2793         }
2794
2795         next_to_use = tx_ring->next_to_use;
2796         req_id = tx_ring->free_tx_ids[next_to_use];
2797         tx_info = &tx_ring->tx_buffer_info[req_id];
2798
2799         tx_info->mbuf = *mbuf;
2800         tx_info->num_of_bufs = 0;
2801
2802         ena_buf = tx_info->bufs;
2803         len = (*mbuf)->m_len;
2804
2805         ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes", (*mbuf)->m_pkthdr.len);
2806
2807         push_len = 0;
2808         header_len = min_t(uint32_t, len, tx_ring->tx_max_header_size);
2809         push_hdr = NULL;
2810
2811         rc = bus_dmamap_load_mbuf_segment(adapter->tx_buf_tag, tx_info->map,
2812             *mbuf, segs, adapter->max_tx_sgl_size, &nsegs, BUS_DMA_NOWAIT);
2813
2814         if (unlikely((rc != 0) || (nsegs == 0))) {
2815                 ena_trace(ENA_WARNING,
2816                     "dmamap load failed! err: %d nsegs: %d", rc, nsegs);
2817                 IFNET_STAT_INC(tx_ring->adapter->ifp, oerrors, 1);
2818 #if 0 /* XXX swildner counters */
2819                 counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
2820 #endif
2821                 tx_info->mbuf = NULL;
2822                 if (rc == ENOMEM)
2823                         return (ENA_COM_NO_MEM);
2824                 else
2825                         return (ENA_COM_INVAL);
2826         }
2827
2828         for (i = 0; i < nsegs; i++) {
2829                 ena_buf->len = segs[i].ds_len;
2830                 ena_buf->paddr = segs[i].ds_addr;
2831                 ena_buf++;
2832         }
2833         tx_info->num_of_bufs = nsegs;
2834
2835         memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
2836         ena_tx_ctx.ena_bufs = tx_info->bufs;
2837         ena_tx_ctx.push_header = push_hdr;
2838         ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2839         ena_tx_ctx.req_id = req_id;
2840         ena_tx_ctx.header_len = header_len;
2841
2842         /* Set flags and meta data */
2843         ena_tx_csum(&ena_tx_ctx, *mbuf);
2844         /* Prepare the packet's descriptors and send them to device */
2845         rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
2846         if (unlikely(rc != 0)) {
2847                 device_printf(adapter->pdev, "failed to prepare tx bufs\n");
2848                 IFNET_STAT_INC(tx_ring->adapter->ifp, oerrors, 1);
2849 #if 0 /* XXX swildner counters */
2850                 counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
2851 #endif
2852                 goto dma_error;
2853         }
2854
2855         IFNET_STAT_INC(tx_ring->adapter->ifp, opackets, 1);
2856 #if 0 /* XXX swildner counters */
2857         counter_enter();
2858         counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
2859         counter_u64_add_protected(tx_ring->tx_stats.bytes,
2860             (*mbuf)->m_pkthdr.len);
2861
2862         counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
2863         counter_u64_add_protected(adapter->hw_stats.tx_bytes,
2864             (*mbuf)->m_pkthdr.len);
2865         counter_exit();
2866 #endif
2867
2868         tx_info->tx_descs = nb_hw_desc;
2869         getmicrouptime(&tx_info->timestamp);
2870         tx_info->print_once = true;
2871
2872         tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
2873             tx_ring->ring_size);
2874
2875         bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map,
2876             BUS_DMASYNC_PREWRITE);
2877
2878         return (0);
2879
2880 dma_error:
2881         tx_info->mbuf = NULL;
2882         bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map);
2883
2884         return (rc);
2885 }
2886
2887 static void
2888 ena_start_xmit(struct ifnet *ifp, struct ifaltq_subque *ifsq)
2889 {
2890         /*
2891          * TODO: Might need to initialize an ena_ring with the
2892          *       ifaltq_subque in it
2893          */
2894         struct ena_adapter *adapter = ifp->if_softc;
2895         struct ena_com_io_sq *io_sq;
2896         struct ena_ring *tx_ring;
2897         int ena_qid;
2898         int acum_pkts = 0;
2899         int ret = 0;
2900
2901         if (unlikely((adapter->ifp->if_flags & IFF_RUNNING) == 0) ||
2902             ifsq_is_oactive(ifsq)) {
2903                 return;
2904         }
2905
2906         /* Check is link_active and some other shit. If it is, purge. */
2907
2908 #if 0
2909         if (unlikely(!adapter->link_status))
2910                 return;
2911 #endif
2912
2913         io_sq = NULL;
2914         tx_ring = NULL;
2915
2916         while (!ifsq_is_empty(ifsq)) {
2917                 struct mbuf *m_head;
2918                 int i;
2919
2920                 //Grab head from mbuf list
2921                 m_head = ifsq_dequeue(ifsq);
2922                 if (m_head == NULL)
2923                         break;
2924
2925                 //pick the associated tx_ring based on hash
2926                 i = m_head->m_pkthdr.hash % adapter->num_queues;
2927
2928                 tx_ring = &adapter->tx_ring[i];
2929                 ENA_RING_MTX_LOCK(tx_ring);
2930                 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
2931                 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
2932
2933                 if (unlikely(!ena_com_sq_have_enough_space(io_sq, ENA_TX_CLEANUP_THRESHOLD)))
2934                         ena_tx_cleanup(tx_ring);
2935
2936                 if (unlikely((ret = ena_xmit_mbuf(tx_ring, &m_head)) != 0)) {
2937                         if (ret == ENA_COM_NO_MEM) {
2938                                 /* XXX put mbuf back on queue */
2939                         } else if (ret == ENA_COM_NO_SPACE) {
2940                                 /* XXX put mbuf back on queue */
2941                         } else {
2942                                 m_freem(m_head);
2943                                 /* XXX advance mbuf queue aka move it forward? */
2944                         }
2945                         ENA_RING_MTX_UNLOCK(tx_ring);
2946                         break;
2947                 }
2948
2949                 //advance mbuf queue, might already be handled by dequeue
2950
2951 #if 0
2952                 // dillon - wtf is this doing here?
2953                 // NOT SURE WHAT TO DO WITH THIS CODE
2954                 if (unlikely((adapter->ifp->if_flags & IFF_RUNNING) == 0))
2955                         return; // break here, not return. tx_ring locked
2956 #endif
2957
2958                 acum_pkts++;
2959
2960                 ENA_RING_MTX_UNLOCK(tx_ring);
2961                 BPF_MTAP(adapter->ifp, m_head);
2962
2963                 if (unlikely(acum_pkts == DB_THRESHOLD)) {
2964                         acum_pkts = 0;
2965                         wmb();
2966                         /* Trigger the dma engine */
2967                         ena_com_write_sq_doorbell(io_sq);
2968 #if 0 /* XXX swildner counters */
2969                         counter_u64_add(tx_ring->tx_stats.doorbells, 1);
2970 #endif
2971                 }
2972
2973         }
2974
2975         if (likely(acum_pkts != 0)) {
2976                 wmb();
2977                 /* Trigger the dma engine */
2978                 ena_com_write_sq_doorbell(io_sq);
2979 #if 0 /* XXX swildner counters */
2980                 counter_u64_add(tx_ring->tx_stats.doorbells, 1);
2981 #endif
2982         }
2983
2984         if (io_sq &&
2985             !ena_com_sq_have_enough_space(io_sq, ENA_TX_CLEANUP_THRESHOLD)) {
2986                 ENA_RING_MTX_LOCK(tx_ring);
2987                 ena_tx_cleanup(tx_ring);
2988                 ENA_RING_MTX_UNLOCK(tx_ring);
2989         }
2990 }
2991
2992 static int
2993 ena_calc_io_queue_num(struct ena_adapter *adapter,
2994     struct ena_com_dev_get_features_ctx *get_feat_ctx)
2995 {
2996         int io_sq_num, io_cq_num, io_queue_num;
2997
2998         io_sq_num = get_feat_ctx->max_queues.max_sq_num;
2999         io_cq_num = get_feat_ctx->max_queues.max_cq_num;
3000
3001         io_queue_num = min_t(int, ncpus, ENA_MAX_NUM_IO_QUEUES);
3002         io_queue_num = min_t(int, io_queue_num, io_sq_num);
3003         io_queue_num = min_t(int, io_queue_num, io_cq_num);
3004         /* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
3005         io_queue_num = min_t(int, io_queue_num,
3006             pci_msix_count(adapter->pdev) - 1);
3007 #ifdef  RSS
3008         io_queue_num = min_t(int, io_queue_num, rss_getnumbuckets());
3009 #endif
3010
3011         return (io_queue_num);
3012 }
3013
3014 static int
3015 ena_calc_queue_size(struct ena_adapter *adapter, uint16_t *max_tx_sgl_size,
3016     uint16_t *max_rx_sgl_size, struct ena_com_dev_get_features_ctx *feat)
3017 {
3018         uint32_t queue_size = ENA_DEFAULT_RING_SIZE;
3019         uint32_t v;
3020         uint32_t q;
3021
3022         queue_size = min_t(uint32_t, queue_size,
3023             feat->max_queues.max_cq_depth);
3024         queue_size = min_t(uint32_t, queue_size,
3025             feat->max_queues.max_sq_depth);
3026
3027         /* round down to the nearest power of 2 */
3028         v = queue_size;
3029         while (v != 0) {
3030                 if (powerof2(queue_size) != 0)
3031                         break;
3032                 v /= 2;
3033                 q = rounddown2(queue_size, v);
3034                 if (q != 0) {
3035                         queue_size = q;
3036                         break;
3037                 }
3038         }
3039
3040         if (unlikely(queue_size == 0)) {
3041                 device_printf(adapter->pdev, "Invalid queue size\n");
3042                 return (ENA_COM_FAULT);
3043         }
3044
3045         *max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3046             feat->max_queues.max_packet_tx_descs);
3047         *max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3048             feat->max_queues.max_packet_rx_descs);
3049
3050         return (queue_size);
3051 }
3052
3053 static int
3054 ena_rss_init_default(struct ena_adapter *adapter)
3055 {
3056         struct ena_com_dev *ena_dev = adapter->ena_dev;
3057         device_t dev = adapter->pdev;
3058         int qid, rc, i;
3059
3060         rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3061         if (unlikely(rc != 0)) {
3062                 device_printf(dev, "Cannot init indirect table\n");
3063                 return (rc);
3064         }
3065
3066         for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3067 #ifdef  RSS
3068                 qid = rss_get_indirection_to_bucket(i);
3069                 qid = qid % adapter->num_queues;
3070 #else
3071                 qid = i % adapter->num_queues;
3072 #endif
3073                 rc = ena_com_indirect_table_fill_entry(ena_dev, i,
3074                     ENA_IO_RXQ_IDX(qid));
3075                 if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3076                         device_printf(dev, "Cannot fill indirect table\n");
3077                         goto err_rss_destroy;
3078                 }
3079         }
3080
3081         rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
3082             ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
3083         if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3084                 device_printf(dev, "Cannot fill hash function\n");
3085                 goto err_rss_destroy;
3086         }
3087
3088         rc = ena_com_set_default_hash_ctrl(ena_dev);
3089         if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3090                 device_printf(dev, "Cannot fill hash control\n");
3091                 goto err_rss_destroy;
3092         }
3093
3094         return (0);
3095
3096 err_rss_destroy:
3097         ena_com_rss_destroy(ena_dev);
3098         return (rc);
3099 }
3100
3101 static void
3102 ena_rss_init_default_deferred(void *arg)
3103 {
3104         struct ena_adapter *adapter;
3105         devclass_t dc;
3106         int max;
3107         int rc;
3108
3109         dc = devclass_find("ena");
3110         if (unlikely(dc == NULL)) {
3111                 ena_trace(ENA_ALERT, "No devclass ena\n");
3112                 return;
3113         }
3114
3115         max = devclass_get_maxunit(dc);
3116         while (max-- >= 0) {
3117                 adapter = devclass_get_softc(dc, max);
3118                 if (adapter != NULL) {
3119                         rc = ena_rss_init_default(adapter);
3120                         adapter->rss_support = true;
3121                         if (unlikely(rc != 0)) {
3122                                 device_printf(adapter->pdev,
3123                                     "WARNING: RSS was not properly initialized,"
3124                                     " it will affect bandwidth\n");
3125                                 adapter->rss_support = false;
3126                         }
3127                 }
3128         }
3129 }
3130 SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL);
3131
3132 static void
3133 ena_config_host_info(struct ena_com_dev *ena_dev)
3134 {
3135         struct ena_admin_host_info *host_info;
3136         int rc;
3137
3138         /* Allocate only the host info */
3139         rc = ena_com_allocate_host_info(ena_dev);
3140         if (unlikely(rc != 0)) {
3141                 ena_trace(ENA_ALERT, "Cannot allocate host info\n");
3142                 return;
3143         }
3144
3145         host_info = ena_dev->host_attr.host_info;
3146
3147         host_info->os_type = ENA_ADMIN_OS_FREEBSD;
3148         host_info->kernel_ver = osreldate;
3149
3150         ksprintf(host_info->kernel_ver_str, "%d", osreldate);
3151         host_info->os_dist = 0;
3152         strncpy(host_info->os_dist_str, osrelease,
3153             sizeof(host_info->os_dist_str) - 1);
3154
3155         host_info->driver_version =
3156                 (DRV_MODULE_VER_MAJOR) |
3157                 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
3158                 (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
3159
3160         rc = ena_com_set_host_attributes(ena_dev);
3161         if (unlikely(rc != 0)) {
3162                 if (rc == EOPNOTSUPP)
3163                         ena_trace(ENA_WARNING, "Cannot set host attributes\n");
3164                 else
3165                         ena_trace(ENA_ALERT, "Cannot set host attributes\n");
3166
3167                 goto err;
3168         }
3169
3170         return;
3171
3172 err:
3173         ena_com_delete_host_info(ena_dev);
3174 }
3175
3176 static int
3177 ena_device_init(struct ena_adapter *adapter, device_t pdev,
3178     struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
3179 {
3180         struct ena_com_dev* ena_dev = adapter->ena_dev;
3181         bool readless_supported;
3182         uint32_t aenq_groups;
3183         int dma_width;
3184         int rc;
3185
3186         rc = ena_com_mmio_reg_read_request_init(ena_dev);
3187         if (unlikely(rc != 0)) {
3188                 device_printf(pdev, "failed to init mmio read less\n");
3189                 return (rc);
3190         }
3191
3192         /*
3193          * The PCIe configuration space revision id indicate if mmio reg
3194          * read is disabled
3195          */
3196         readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
3197         ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3198
3199         rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3200         if (unlikely(rc != 0)) {
3201                 device_printf(pdev, "Can not reset device\n");
3202                 goto err_mmio_read_less;
3203         }
3204
3205         rc = ena_com_validate_version(ena_dev);
3206         if (unlikely(rc != 0)) {
3207                 device_printf(pdev, "device version is too low\n");
3208                 goto err_mmio_read_less;
3209         }
3210
3211         dma_width = ena_com_get_dma_width(ena_dev);
3212         if (unlikely(dma_width < 0)) {
3213                 device_printf(pdev, "Invalid dma width value %d", dma_width);
3214                 rc = dma_width;
3215                 goto err_mmio_read_less;
3216         }
3217         adapter->dma_width = dma_width;
3218
3219         /* ENA admin level init */
3220         rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
3221         if (unlikely(rc != 0)) {
3222                 device_printf(pdev,
3223                     "Can not initialize ena admin queue with device\n");
3224                 goto err_mmio_read_less;
3225         }
3226
3227         /*
3228          * To enable the msix interrupts the driver needs to know the number
3229          * of queues. So the driver uses polling mode to retrieve this
3230          * information
3231          */
3232         ena_com_set_admin_polling_mode(ena_dev, true);
3233
3234         ena_config_host_info(ena_dev);
3235
3236         /* Get Device Attributes */
3237         rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3238         if (unlikely(rc != 0)) {
3239                 device_printf(pdev,
3240                     "Cannot get attribute for ena device rc: %d\n", rc);
3241                 goto err_admin_init;
3242         }
3243
3244         aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | BIT(ENA_ADMIN_KEEP_ALIVE);
3245
3246         aenq_groups &= get_feat_ctx->aenq.supported_groups;
3247         rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3248         if (unlikely(rc != 0)) {
3249                 device_printf(pdev, "Cannot configure aenq groups rc: %d\n", rc);
3250                 goto err_admin_init;
3251         }
3252
3253         *wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3254
3255         return (0);
3256
3257 err_admin_init:
3258         ena_com_delete_host_info(ena_dev);
3259         ena_com_admin_destroy(ena_dev);
3260 err_mmio_read_less:
3261         ena_com_mmio_reg_read_request_destroy(ena_dev);
3262
3263         return (rc);
3264 }
3265
3266 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
3267     int io_vectors)
3268 {
3269         struct ena_com_dev *ena_dev = adapter->ena_dev;
3270         int rc;
3271
3272         rc = ena_enable_msix(adapter);
3273         if (unlikely(rc != 0)) {
3274                 device_printf(adapter->pdev, "Error with MSI-X enablement\n");
3275                 return (rc);
3276         }
3277
3278         ena_setup_mgmnt_intr(adapter);
3279
3280         rc = ena_request_mgmnt_irq(adapter);
3281         if (unlikely(rc != 0)) {
3282                 device_printf(adapter->pdev, "Cannot setup mgmnt queue intr\n");
3283                 goto err_disable_msix;
3284         }
3285
3286         pci_enable_msix(adapter->pdev);
3287
3288         ena_com_set_admin_polling_mode(ena_dev, false);
3289
3290         ena_com_admin_aenq_enable(ena_dev);
3291
3292         return (0);
3293
3294 err_disable_msix:
3295         ena_disable_msix(adapter);
3296
3297         return (rc);
3298 }
3299
3300 /* Function called on ENA_ADMIN_KEEP_ALIVE event */
3301 static void ena_keep_alive_wd(void *adapter_data,
3302     struct ena_admin_aenq_entry *aenq_e)
3303 {
3304         struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3305         struct ena_admin_aenq_keep_alive_desc *desc;
3306         struct timeval time;
3307         uint64_t rx_drops;
3308
3309         desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
3310
3311         rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
3312         IFNET_STAT_INC(adapter->ifp, iqdrops, 1);
3313 #if 0 /* XXX swildner counters */
3314         counter_u64_zero(adapter->hw_stats.rx_drops);
3315         counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
3316 #endif
3317
3318         getmicrouptime(&time);
3319         atomic_store_rel_64(&adapter->keep_alive_timestamp.tv_sec, time.tv_sec);
3320 }
3321
3322 /* Check for keep alive expiration */
3323 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3324 {
3325         struct timeval timestamp, time;
3326
3327         if (adapter->wd_active == 0)
3328                 return;
3329
3330         if (likely(adapter->keep_alive_timeout == 0))
3331                 return;
3332
3333         timestamp.tv_sec = atomic_load_acq_64(&adapter->keep_alive_timestamp.tv_sec);
3334         getmicrouptime(&time);
3335         timevalsub(&time, &timestamp);
3336         if (unlikely(time.tv_sec > adapter->keep_alive_timeout)) {
3337                 device_printf(adapter->pdev,
3338                     "Keep alive watchdog timeout.\n");
3339 #if 0 /* XXX swildner counters */
3340                 counter_u64_add(adapter->dev_stats.wd_expired, 1);
3341 #endif
3342                 adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
3343                 adapter->trigger_reset = true;
3344         }
3345 }
3346
3347 /* Check if admin queue is enabled */
3348 static void check_for_admin_com_state(struct ena_adapter *adapter)
3349 {
3350         if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) ==
3351             false)) {
3352                 device_printf(adapter->pdev,
3353                     "ENA admin queue is not in running state!\n");
3354 #if 0 /* XXX swildner counters */
3355                 counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
3356 #endif
3357                 adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
3358                 adapter->trigger_reset = true;
3359         }
3360 }
3361
3362 static int
3363 check_missing_comp_in_queue(struct ena_adapter *adapter,
3364     struct ena_ring *tx_ring)
3365 {
3366         struct timeval curtime, time;
3367         struct ena_tx_buffer *tx_buf;
3368         uint32_t missed_tx = 0;
3369         int i;
3370
3371         getmicrouptime(&curtime);
3372
3373         for (i = 0; i < tx_ring->ring_size; i++) {
3374                 tx_buf = &tx_ring->tx_buffer_info[i];
3375
3376                 if (timevalisset(&tx_buf->timestamp) == 0)
3377                         continue;
3378
3379                 time = curtime;
3380                 timevalsub(&time, &tx_buf->timestamp);
3381
3382                 /* Check again if packet is still waiting */
3383                 //WATCH: Might not be exactly comparable
3384                 if (unlikely(time.tv_sec > adapter->missing_tx_timeout)) {
3385
3386                         if (!tx_buf->print_once)
3387                                 ena_trace(ENA_WARNING, "Found a Tx that wasn't "
3388                                     "completed on time, qid %d, index %d.\n",
3389                                     tx_ring->qid, i);
3390
3391                         tx_buf->print_once = true;
3392                         missed_tx++;
3393 #if 0 /* XXX swildner counters */
3394                         counter_u64_add(tx_ring->tx_stats.missing_tx_comp, 1);
3395 #endif
3396
3397                         if (unlikely(missed_tx >
3398                             adapter->missing_tx_threshold)) {
3399                                 device_printf(adapter->pdev,
3400                                     "The number of lost tx completion "
3401                                     "is above the threshold (%d > %d). "
3402                                     "Reset the device\n",
3403                                     missed_tx, adapter->missing_tx_threshold);
3404                                 adapter->reset_reason =
3405                                     ENA_REGS_RESET_MISS_TX_CMPL;
3406                                 adapter->trigger_reset = true;
3407                                 return (EIO);
3408                         }
3409                 }
3410         }
3411
3412         return (0);
3413 }
3414
3415 /*
3416  * Check for TX which were not completed on time.
3417  * Timeout is defined by "missing_tx_timeout".
3418  * Reset will be performed if number of incompleted
3419  * transactions exceeds "missing_tx_threshold".
3420  */
3421 static void
3422 check_for_missing_tx_completions(struct ena_adapter *adapter)
3423 {
3424         struct ena_ring *tx_ring;
3425         int i, budget, rc;
3426
3427         /* Make sure the driver doesn't turn the device in other process */
3428         rmb();
3429
3430         if (!adapter->up)
3431                 return;
3432
3433         if (adapter->trigger_reset)
3434                 return;
3435
3436         if (adapter->missing_tx_timeout == 0)
3437                 return;
3438
3439         budget = adapter->missing_tx_max_queues;
3440
3441         for (i = adapter->next_monitored_tx_qid; i < adapter->num_queues; i++) {
3442                 tx_ring = &adapter->tx_ring[i];
3443
3444                 rc = check_missing_comp_in_queue(adapter, tx_ring);
3445                 if (unlikely(rc != 0))
3446                         return;
3447
3448                 budget--;
3449                 if (budget == 0) {
3450                         i++;
3451                         break;
3452                 }
3453         }
3454
3455         adapter->next_monitored_tx_qid = i % adapter->num_queues;
3456 }
3457
3458 /* trigger deferred rx cleanup after 2 consecutive detections */
3459 #define EMPTY_RX_REFILL 2
3460 /* For the rare case where the device runs out of Rx descriptors and the
3461  * msix handler failed to refill new Rx descriptors (due to a lack of memory
3462  * for example).
3463  * This case will lead to a deadlock:
3464  * The device won't send interrupts since all the new Rx packets will be dropped
3465  * The msix handler won't allocate new Rx descriptors so the device won't be
3466  * able to send new packets.
3467  *
3468  * When such a situation is detected - execute rx cleanup task in another thread
3469  */
3470 static void
3471 check_for_empty_rx_ring(struct ena_adapter *adapter)
3472 {
3473         struct ena_ring *rx_ring;
3474         int i, refill_required;
3475
3476         if (!adapter->up)
3477                 return;
3478
3479         if (adapter->trigger_reset)
3480                 return;
3481
3482         for (i = 0; i < adapter->num_queues; i++) {
3483                 rx_ring = &adapter->rx_ring[i];
3484
3485                 refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq);
3486                 if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3487                         rx_ring->empty_rx_queue++;
3488
3489                         if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3490 #if 0 /* XXX swildner counters */
3491                                 counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
3492                                     1);
3493 #endif
3494
3495                                 device_printf(adapter->pdev,
3496                                     "trigger refill for ring %d\n", i);
3497
3498                                 taskqueue_enqueue(rx_ring->cmpl_tq,
3499                                     &rx_ring->cmpl_task);
3500                                 rx_ring->empty_rx_queue = 0;
3501                         }
3502                 } else {
3503                         rx_ring->empty_rx_queue = 0;
3504                 }
3505         }
3506 }
3507
3508 static void
3509 ena_timer_service(void *data)
3510 {
3511         struct ena_adapter *adapter = (struct ena_adapter *)data;
3512         struct ena_admin_host_info *host_info =
3513             adapter->ena_dev->host_attr.host_info;
3514
3515         check_for_missing_keep_alive(adapter);
3516
3517         check_for_admin_com_state(adapter);
3518
3519         check_for_missing_tx_completions(adapter);
3520
3521         check_for_empty_rx_ring(adapter);
3522
3523         if (host_info != NULL)
3524                 ena_update_host_info(host_info, adapter->ifp);
3525
3526         if (unlikely(adapter->trigger_reset)) {
3527                 device_printf(adapter->pdev, "Trigger reset is on\n");
3528                 taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
3529                 return;
3530         }
3531
3532         /*
3533          * Schedule another timeout one second from now.
3534          */
3535         /* XXX swildner callout_schedule_sbt(&adapter->timer_service, SBT_1S, SBT_1S, 0); */
3536         callout_reset(&adapter->timer_service, hz, ena_timer_service,
3537             (void *)adapter);
3538 }
3539
3540 static void
3541 ena_reset_task(void *arg, int pending)
3542 {
3543         struct ena_com_dev_get_features_ctx get_feat_ctx;
3544         struct ena_adapter *adapter = (struct ena_adapter *)arg;
3545         struct ena_com_dev *ena_dev = adapter->ena_dev;
3546         bool dev_up;
3547         int rc;
3548
3549         if (unlikely(!adapter->trigger_reset)) {
3550                 device_printf(adapter->pdev,
3551                     "device reset scheduled but trigger_reset is off\n");
3552                 return;
3553         }
3554
3555         lockmgr(&adapter->ioctl_lock, LK_EXCLUSIVE);
3556
3557         callout_drain(&adapter->timer_service);
3558
3559         dev_up = adapter->up;
3560
3561         ena_com_set_admin_running_state(ena_dev, false);
3562         ena_down(adapter);
3563         ena_free_mgmnt_irq(adapter);
3564         ena_disable_msix(adapter);
3565         ena_com_abort_admin_commands(ena_dev);
3566         ena_com_wait_for_abort_completion(ena_dev);
3567         ena_com_admin_destroy(ena_dev);
3568         ena_com_mmio_reg_read_request_destroy(ena_dev);
3569
3570         adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3571         adapter->trigger_reset = false;
3572
3573         /* Finished destroy part. Restart the device */
3574         rc = ena_device_init(adapter, adapter->pdev, &get_feat_ctx,
3575             &adapter->wd_active);
3576         if (unlikely(rc != 0)) {
3577                 device_printf(adapter->pdev,
3578                     "ENA device init failed! (err: %d)\n", rc);
3579                 goto err_dev_free;
3580         }
3581
3582         rc = ena_enable_msix_and_set_admin_interrupts(adapter,
3583             adapter->num_queues);
3584         if (unlikely(rc != 0)) {
3585                 device_printf(adapter->pdev, "Enable MSI-X failed\n");
3586                 goto err_com_free;
3587         }
3588
3589         /* If the interface was up before the reset bring it up */
3590         if (dev_up) {
3591                 rc = ena_up(adapter);
3592                 if (unlikely(rc != 0)) {
3593                         device_printf(adapter->pdev,
3594                             "Failed to create I/O queues\n");
3595                         goto err_msix_free;
3596                 }
3597         }
3598
3599         callout_reset(&adapter->timer_service, hz,
3600             ena_timer_service, (void *)adapter);
3601
3602         lockmgr(&adapter->ioctl_lock, LK_RELEASE);
3603
3604         return;
3605
3606 err_msix_free:
3607         ena_free_mgmnt_irq(adapter);
3608         ena_disable_msix(adapter);
3609 err_com_free:
3610         ena_com_admin_destroy(ena_dev);
3611 err_dev_free:
3612         device_printf(adapter->pdev, "ENA reset failed!\n");
3613         adapter->running = false;
3614         lockmgr(&adapter->ioctl_lock, LK_RELEASE);
3615 }
3616
3617 /**
3618  * ena_attach - Device Initialization Routine
3619  * @pdev: device information struct
3620  *
3621  * Returns 0 on success, otherwise on failure.
3622  *
3623  * ena_attach initializes an adapter identified by a device structure.
3624  * The OS initialization, configuring of the adapter private structure,
3625  * and a hardware reset occur.
3626  **/
3627 static int
3628 ena_attach(device_t pdev)
3629 {
3630         struct ena_com_dev_get_features_ctx get_feat_ctx;
3631         static int version_printed;
3632         struct ena_adapter *adapter;
3633         struct ena_com_dev *ena_dev = NULL;
3634         uint16_t tx_sgl_size = 0;
3635         uint16_t rx_sgl_size = 0;
3636         int io_queue_num;
3637         int queue_size;
3638         int rc;
3639         adapter = device_get_softc(pdev);
3640         adapter->pdev = pdev;
3641
3642         lockinit(&adapter->global_lock, "ENA global mtx", 0, LK_CANRECURSE);
3643         lockinit(&adapter->ioctl_lock, "ENA ioctl sx", 0, LK_CANRECURSE);
3644
3645         /* Set up the timer service */
3646         callout_init_lk(&adapter->timer_service, &adapter->global_lock);
3647         adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
3648         adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
3649         adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
3650         adapter->missing_tx_threshold = DEFAULT_TX_CMP_THRESHOLD;
3651
3652         if (version_printed++ == 0)
3653                 device_printf(pdev, "%s\n", ena_version);
3654
3655         rc = ena_allocate_pci_resources(adapter);
3656         if (unlikely(rc != 0)) {
3657                 device_printf(pdev, "PCI resource allocation failed!\n");
3658                 ena_free_pci_resources(adapter);
3659                 return (rc);
3660         }
3661
3662         /* Allocate memory for ena_dev structure */
3663         ena_dev = kmalloc(sizeof(struct ena_com_dev), M_DEVBUF,
3664             M_WAITOK | M_ZERO);
3665
3666         adapter->ena_dev = ena_dev;
3667         ena_dev->dmadev = pdev;
3668         ena_dev->bus = kmalloc(sizeof(struct ena_bus), M_DEVBUF,
3669             M_WAITOK | M_ZERO);
3670
3671         /* Store register resources */
3672         ((struct ena_bus*)(ena_dev->bus))->reg_bar_t =
3673             rman_get_bustag(adapter->registers);
3674         ((struct ena_bus*)(ena_dev->bus))->reg_bar_h =
3675             rman_get_bushandle(adapter->registers);
3676
3677         if (unlikely(((struct ena_bus*)(ena_dev->bus))->reg_bar_h == 0)) {
3678                 device_printf(pdev, "failed to pmap registers bar\n");
3679                 rc = ENXIO;
3680                 goto err_bus_free;
3681         }
3682
3683         ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3684
3685         /* Device initialization */
3686         rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
3687         if (unlikely(rc != 0)) {
3688                 device_printf(pdev, "ENA device init failed! (err: %d)\n", rc);
3689                 rc = ENXIO;
3690                 goto err_bus_free;
3691         }
3692
3693         getmicrouptime(&adapter->keep_alive_timestamp);
3694
3695         adapter->tx_offload_cap = get_feat_ctx.offload.tx;
3696
3697         /* Set for sure that interface is not up */
3698         adapter->up = false;
3699
3700         memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
3701             ETHER_ADDR_LEN);
3702
3703         /* calculate IO queue number to create */
3704         io_queue_num = ena_calc_io_queue_num(adapter, &get_feat_ctx);
3705
3706         ENA_ASSERT(io_queue_num > 0, "Invalid queue number: %d\n",
3707             io_queue_num);
3708         adapter->num_queues = io_queue_num;
3709
3710         adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
3711
3712         /* calculatre ring sizes */
3713         queue_size = ena_calc_queue_size(adapter,&tx_sgl_size,
3714             &rx_sgl_size, &get_feat_ctx);
3715         if (unlikely((queue_size <= 0) || (io_queue_num <= 0))) {
3716                 rc = ENA_COM_FAULT;
3717                 goto err_com_free;
3718         }
3719
3720         adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3721
3722         adapter->tx_ring_size = queue_size;
3723         adapter->rx_ring_size = queue_size;
3724
3725         adapter->max_tx_sgl_size = tx_sgl_size;
3726         adapter->max_rx_sgl_size = rx_sgl_size;
3727
3728         /* set up dma tags for rx and tx buffers */
3729         rc = ena_setup_tx_dma_tag(adapter);
3730         if (unlikely(rc != 0)) {
3731                 device_printf(pdev, "Failed to create TX DMA tag\n");
3732                 goto err_com_free;
3733         }
3734
3735         rc = ena_setup_rx_dma_tag(adapter);
3736         if (unlikely(rc != 0)) {
3737                 device_printf(pdev, "Failed to create RX DMA tag\n");
3738                 goto err_tx_tag_free;
3739         }
3740
3741         /* initialize rings basic information */
3742         device_printf(pdev, "initialize %d io queues\n", io_queue_num);
3743         ena_init_io_rings(adapter);
3744
3745         /* setup network interface */
3746         rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
3747         if (unlikely(rc != 0)) {
3748                 device_printf(pdev, "Error with network interface setup\n");
3749                 goto err_io_free;
3750         }
3751
3752         rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
3753         if (unlikely(rc != 0)) {
3754                 device_printf(pdev,
3755                     "Failed to enable and set the admin interrupts\n");
3756                 goto err_ifp_free;
3757         }
3758
3759         /* Initialize reset task queue */
3760         TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
3761         adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
3762             M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
3763         taskqueue_start_threads(&adapter->reset_tq, 1, TDPRI_KERN_DAEMON, -1,
3764             "%s rstq", device_get_nameunit(adapter->pdev));
3765
3766         /* Initialize statistics */
3767 #if 0 /* XXX swildner counters */
3768         ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
3769             sizeof(struct ena_stats_dev));
3770         ena_alloc_counters((counter_u64_t *)&adapter->hw_stats,
3771             sizeof(struct ena_hw_stats));
3772 #endif
3773         ena_sysctl_add_nodes(adapter);
3774
3775         /* Tell the stack that the interface is not active */
3776         ifq_set_oactive(&adapter->ifp->if_snd);
3777         adapter->ifp->if_flags &= ~IFF_RUNNING;
3778
3779         adapter->running = true;
3780         return (0);
3781
3782 err_ifp_free:
3783         if_detach(adapter->ifp);
3784         if_free(adapter->ifp);
3785 err_io_free:
3786         ena_free_all_io_rings_resources(adapter);
3787         ena_free_rx_dma_tag(adapter);
3788 err_tx_tag_free:
3789         ena_free_tx_dma_tag(adapter);
3790 err_com_free:
3791         ena_com_admin_destroy(ena_dev);
3792         ena_com_delete_host_info(ena_dev);
3793         ena_com_mmio_reg_read_request_destroy(ena_dev);
3794 err_bus_free:
3795         kfree(ena_dev->bus, M_DEVBUF);
3796         kfree(ena_dev, M_DEVBUF);
3797         ena_free_pci_resources(adapter);
3798
3799         return (rc);
3800 }
3801
3802 /**
3803  * ena_detach - Device Removal Routine
3804  * @pdev: device information struct
3805  *
3806  * ena_detach is called by the device subsystem to alert the driver
3807  * that it should release a PCI device.
3808  **/
3809 static int
3810 ena_detach(device_t pdev)
3811 {
3812         struct ena_adapter *adapter = device_get_softc(pdev);
3813         struct ena_com_dev *ena_dev = adapter->ena_dev;
3814         int rc;
3815
3816         /* Make sure VLANS are not using driver */
3817         if (adapter->ifp->if_vlantrunks != NULL) {
3818                 device_printf(adapter->pdev ,"VLAN is in use, detach first\n");
3819                 return (EBUSY);
3820         }
3821
3822         /* Free reset task and callout */
3823         callout_drain(&adapter->timer_service);
3824         while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
3825                 taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
3826         taskqueue_free(adapter->reset_tq);
3827
3828         lockmgr(&adapter->ioctl_lock, LK_EXCLUSIVE);
3829         ena_down(adapter);
3830         lockmgr(&adapter->ioctl_lock, LK_RELEASE);
3831
3832         if (adapter->ifp != NULL) {
3833                 ether_ifdetach(adapter->ifp);
3834                 if_free(adapter->ifp);
3835         }
3836
3837         ena_free_all_io_rings_resources(adapter);
3838
3839 #if 0 /* XXX swildner counters */
3840         ena_free_counters((counter_u64_t *)&adapter->hw_stats,
3841             sizeof(struct ena_hw_stats));
3842         ena_free_counters((counter_u64_t *)&adapter->dev_stats,
3843             sizeof(struct ena_stats_dev));
3844 #endif
3845
3846         if (likely(adapter->rss_support))
3847                 ena_com_rss_destroy(ena_dev);
3848
3849         rc = ena_free_rx_dma_tag(adapter);
3850         if (unlikely(rc != 0))
3851                 device_printf(adapter->pdev,
3852                     "Unmapped RX DMA tag associations\n");
3853
3854         rc = ena_free_tx_dma_tag(adapter);
3855         if (unlikely(rc != 0))
3856                 device_printf(adapter->pdev,
3857                     "Unmapped TX DMA tag associations\n");
3858
3859         /* Reset the device only if the device is running. */
3860         if (adapter->running)
3861                 ena_com_dev_reset(ena_dev, adapter->reset_reason);
3862
3863         ena_com_delete_host_info(ena_dev);
3864
3865         ena_free_irqs(adapter);
3866
3867         ena_com_abort_admin_commands(ena_dev);
3868
3869         ena_com_wait_for_abort_completion(ena_dev);
3870
3871         ena_com_admin_destroy(ena_dev);
3872
3873         ena_com_mmio_reg_read_request_destroy(ena_dev);
3874
3875         ena_free_pci_resources(adapter);
3876
3877         lockuninit(&adapter->global_lock);
3878         lockuninit(&adapter->ioctl_lock);
3879
3880         if (ena_dev->bus != NULL)
3881                 kfree(ena_dev->bus, M_DEVBUF);
3882
3883         if (ena_dev != NULL)
3884                 kfree(ena_dev, M_DEVBUF);
3885
3886         return (bus_generic_detach(pdev));
3887 }
3888
3889 /******************************************************************************
3890  ******************************** AENQ Handlers *******************************
3891  *****************************************************************************/
3892 /**
3893  * ena_update_on_link_change:
3894  * Notify the network interface about the change in link status
3895  **/
3896 static void
3897 ena_update_on_link_change(void *adapter_data,
3898     struct ena_admin_aenq_entry *aenq_e)
3899 {
3900         struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3901         struct ena_admin_aenq_link_change_desc *aenq_desc;
3902         int status;
3903         if_t ifp;
3904
3905         aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3906         ifp = adapter->ifp;
3907         status = aenq_desc->flags &
3908             ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3909
3910         if (status != 0) {
3911                 device_printf(adapter->pdev, "link is UP\n");
3912                 ifp->if_link_state = LINK_STATE_UP;
3913                 if_link_state_change(ifp);
3914         } else if (status == 0) {
3915                 device_printf(adapter->pdev, "link is DOWN\n");
3916                 ifp->if_link_state = LINK_STATE_DOWN;
3917                 if_link_state_change(ifp);
3918         } else {
3919                 device_printf(adapter->pdev, "invalid value recvd\n");
3920                 BUG();
3921         }
3922
3923         adapter->link_status = status;
3924 }
3925
3926 /**
3927  * This handler will called for unknown event group or unimplemented handlers
3928  **/
3929 static void
3930 unimplemented_aenq_handler(void *data,
3931     struct ena_admin_aenq_entry *aenq_e)
3932 {
3933         return;
3934 }
3935
3936 static struct ena_aenq_handlers aenq_handlers = {
3937     .handlers = {
3938             [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3939             [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
3940     },
3941     .unimplemented_handler = unimplemented_aenq_handler
3942 };
3943
3944 /*********************************************************************
3945  *  FreeBSD Device Interface Entry Points
3946  *********************************************************************/
3947
3948 static device_method_t ena_methods[] = {
3949     /* Device interface */
3950     DEVMETHOD(device_probe, ena_probe),
3951     DEVMETHOD(device_attach, ena_attach),
3952     DEVMETHOD(device_detach, ena_detach),
3953     DEVMETHOD_END
3954 };
3955
3956 static driver_t ena_driver = {
3957     "ena", ena_methods, sizeof(struct ena_adapter),
3958 };
3959
3960 devclass_t ena_devclass;
3961 DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, NULL, NULL);
3962 MODULE_DEPEND(ena, pci, 1, 1, 1);
3963 MODULE_DEPEND(ena, ether, 1, 1, 1);
3964
3965 /*********************************************************************/