06063682d3966d48c7c99df63c0d89c1365a1952
[dragonfly.git] / sys / dev / netif / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33
34 #ifdef HAVE_KERNEL_OPTION_HEADERS
35 #include "opt_device_polling.h"
36 #include "opt_inet.h"
37 #include "opt_altq.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58
59 #include <net/bpf.h>
60 #include <net/ethernet.h>
61 #include <net/if.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_media.h>
65 #include <net/ifq_var.h>
66
67 #include <net/if_types.h>
68 #include <net/vlan/if_vlan_var.h>
69 #include <net/vlan/if_vlan_ether.h>
70
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78
79 #include <sys/in_cksum.h>
80 #include <bus/pci/pcivar.h>
81 #include <bus/pci/pcireg.h>
82
83 #include "e1000_api.h"
84 #include "e1000_82571.h"
85 #include "if_em.h"
86
87 /*********************************************************************
88  *  Set this to one to display debug statistics
89  *********************************************************************/
90 int     em_display_debug_stats = 0;
91
92 /*********************************************************************
93  *  Driver version:
94  *********************************************************************/
95 char em_driver_version[] = "6.9.25";
96
97
98 /*********************************************************************
99  *  PCI Device ID Table
100  *
101  *  Used by probe to select devices to load on
102  *  Last field stores an index into e1000_strings
103  *  Last entry must be all 0s
104  *
105  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106  *********************************************************************/
107
108 static em_vendor_info_t em_vendor_info_array[] =
109 {
110         /* Intel(R) PRO/1000 Network Connection */
111         { 0x8086, E1000_DEV_ID_82540EM,         PCI_ANY_ID, PCI_ANY_ID, 0},
112         { 0x8086, E1000_DEV_ID_82540EM_LOM,     PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82540EP,         PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82540EP_LOM,     PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82540EP_LP,      PCI_ANY_ID, PCI_ANY_ID, 0},
116
117         { 0x8086, E1000_DEV_ID_82541EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82541ER,         PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82541ER_LOM,     PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82541EI_MOBILE,  PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82541GI,         PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82541GI_LF,      PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82541GI_MOBILE,  PCI_ANY_ID, PCI_ANY_ID, 0},
124
125         { 0x8086, E1000_DEV_ID_82542,           PCI_ANY_ID, PCI_ANY_ID, 0},
126
127         { 0x8086, E1000_DEV_ID_82543GC_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82543GC_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
129
130         { 0x8086, E1000_DEV_ID_82544EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82544EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82544GC_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82544GC_LOM,     PCI_ANY_ID, PCI_ANY_ID, 0},
134
135         { 0x8086, E1000_DEV_ID_82545EM_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82545EM_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82545GM_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82545GM_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82545GM_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
140
141         { 0x8086, E1000_DEV_ID_82546EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_82546EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_82546GB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_82546GB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_82546GB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_82546GB_PCIE,    PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
150                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
151
152         { 0x8086, E1000_DEV_ID_82547EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_82547EI_MOBILE,  PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_82547GI,         PCI_ANY_ID, PCI_ANY_ID, 0},
155
156         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
160                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
162                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
164                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
166                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
168                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
170                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
175
176         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
183                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
185                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
187                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
191         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
192         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
194         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
195         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
196         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
197         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
198         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
199         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
200         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
201         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
202         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
203         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
204         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
205         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
206         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
207         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
208         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
209         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
210         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
211         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
212         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
213         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
214         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
215         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
216         /* required last entry */
217         { 0, 0, 0, 0, 0}
218 };
219
220 /*********************************************************************
221  *  Table of branding strings for all supported NICs.
222  *********************************************************************/
223
224 static char *em_strings[] = {
225         "Intel(R) PRO/1000 Network Connection"
226 };
227
228 /*********************************************************************
229  *  Function prototypes
230  *********************************************************************/
231 static int      em_probe(device_t);
232 static int      em_attach(device_t);
233 static int      em_detach(device_t);
234 static int      em_shutdown(device_t);
235 static int      em_suspend(device_t);
236 static int      em_resume(device_t);
237 static void     em_start(struct ifnet *);
238 static void     em_start_locked(struct ifnet *ifp);
239 #if __FreeBSD_version >= 800000
240 static int      em_mq_start(struct ifnet *, struct mbuf *);
241 static int      em_mq_start_locked(struct ifnet *, struct mbuf *);
242 static void     em_qflush(struct ifnet *);
243 #endif
244 static int      em_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
245 static void     em_init(void *);
246 static void     em_init_locked(struct adapter *);
247 static void     em_stop(void *);
248 static void     em_media_status(struct ifnet *, struct ifmediareq *);
249 static int      em_media_change(struct ifnet *);
250 static void     em_identify_hardware(struct adapter *);
251 static int      em_allocate_pci_resources(struct adapter *);
252 static int      em_allocate_legacy(struct adapter *adapter);
253 static int      em_allocate_msix(struct adapter *adapter);
254 static int      em_setup_msix(struct adapter *);
255 static void     em_free_pci_resources(struct adapter *);
256 static void     em_local_timer(void *);
257 static int      em_hardware_init(struct adapter *);
258 static void     em_setup_interface(device_t, struct adapter *);
259 static void     em_setup_transmit_structures(struct adapter *);
260 static void     em_initialize_transmit_unit(struct adapter *);
261 static int      em_setup_receive_structures(struct adapter *);
262 static void     em_initialize_receive_unit(struct adapter *);
263 static void     em_enable_intr(struct adapter *);
264 static void     em_disable_intr(struct adapter *);
265 static void     em_free_transmit_structures(struct adapter *);
266 static void     em_free_receive_structures(struct adapter *);
267 static void     em_update_stats_counters(struct adapter *);
268 static void     em_txeof(struct adapter *);
269 static void     em_tx_purge(struct adapter *);
270 static int      em_allocate_receive_structures(struct adapter *);
271 static int      em_allocate_transmit_structures(struct adapter *);
272 static int      em_rxeof(struct adapter *, int);
273 #ifndef __NO_STRICT_ALIGNMENT
274 static int      em_fixup_rx(struct adapter *);
275 #endif
276 static void     em_receive_checksum(struct adapter *, struct e1000_rx_desc *,
277                     struct mbuf *);
278 static void     em_transmit_checksum_setup(struct adapter *, struct mbuf *,
279                     u32 *, u32 *);
280 #ifdef NET_TSO
281 static bool     em_tso_setup(struct adapter *, struct mbuf *,
282                     u32 *, u32 *);
283 #endif
284 static void     em_set_promisc(struct adapter *);
285 static void     em_disable_promisc(struct adapter *);
286 static void     em_set_multi(struct adapter *);
287 static void     em_print_hw_stats(struct adapter *);
288 static void     em_update_link_status(struct adapter *);
289 static int      em_get_buf(struct adapter *, int);
290
291 static void     em_register_vlan(void *, struct ifnet *, u16);
292 static void     em_unregister_vlan(void *, struct ifnet *, u16);
293 static void     em_setup_vlan_hw_support(struct adapter *);
294
295 static int      em_xmit(struct adapter *, struct mbuf **);
296 static void     em_smartspeed(struct adapter *);
297 static int      em_82547_fifo_workaround(struct adapter *, int);
298 static void     em_82547_update_fifo_head(struct adapter *, int);
299 static int      em_82547_tx_fifo_reset(struct adapter *);
300 static void     em_82547_move_tail(void *);
301 static int      em_dma_malloc(struct adapter *, bus_size_t,
302                     struct em_dma_alloc *, int);
303 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
304 static void     em_print_debug_info(struct adapter *);
305 static void     em_print_nvm_info(struct adapter *);
306 static int      em_is_valid_ether_addr(u8 *);
307 static int      em_sysctl_stats(SYSCTL_HANDLER_ARGS);
308 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
309 static u32      em_fill_descriptors (bus_addr_t address, u32 length,
310                     PDESC_ARRAY desc_array);
311 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
312 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
313                     const char *, struct em_int_delay_info *, int, int);
314 /* Management and WOL Support */
315 static void     em_init_manageability(struct adapter *);
316 static void     em_release_manageability(struct adapter *);
317 static void     em_get_hw_control(struct adapter *);
318 static void     em_release_hw_control(struct adapter *);
319 static void     em_get_wakeup(device_t);
320 static void     em_enable_wakeup(device_t);
321 static int      em_enable_phy_wakeup(struct adapter *);
322
323 #ifdef EM_LEGACY_IRQ
324 static void     em_intr(void *);
325 #else /* FAST IRQ */
326 static void     em_irq_fast(void *);
327
328 /* MSIX handlers */
329 static void     em_msix_tx(void *);
330 static void     em_msix_rx(void *);
331 static void     em_msix_link(void *);
332 static void     em_handle_rx(void *context, int pending);
333 static void     em_handle_tx(void *context, int pending);
334
335 static void     em_handle_rxtx(void *context, int pending);
336 static void     em_handle_link(void *context, int pending);
337 static void     em_add_rx_process_limit(struct adapter *, const char *,
338                     const char *, int *, int);
339 #endif /* ~EM_LEGACY_IRQ */
340
341 #ifdef DEVICE_POLLING
342 static poll_handler_t em_poll;
343 #endif /* POLLING */
344
345 /*********************************************************************
346  *  FreeBSD Device Interface Entry Points
347  *********************************************************************/
348
349 static device_method_t em_methods[] = {
350         /* Device interface */
351         DEVMETHOD(device_probe, em_probe),
352         DEVMETHOD(device_attach, em_attach),
353         DEVMETHOD(device_detach, em_detach),
354         DEVMETHOD(device_shutdown, em_shutdown),
355         DEVMETHOD(device_suspend, em_suspend),
356         DEVMETHOD(device_resume, em_resume),
357         {0, 0}
358 };
359
360 static driver_t em_driver = {
361         "em", em_methods, sizeof(struct adapter),
362 };
363
364 static devclass_t em_devclass;
365 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
366 MODULE_DEPEND(em, pci, 1, 1, 1);
367 MODULE_DEPEND(em, ether, 1, 1, 1);
368
369 /*********************************************************************
370  *  Tunable default values.
371  *********************************************************************/
372
373 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
374 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
375 #define M_TSO_LEN                       66
376
377 /* Allow common code without TSO */
378 #ifndef CSUM_TSO
379 #define CSUM_TSO        0
380 #endif
381
382 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
383 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
384 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
385 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
386 static int em_rxd = EM_DEFAULT_RXD;
387 static int em_txd = EM_DEFAULT_TXD;
388 static int em_smart_pwr_down = FALSE;
389 /* Controls whether promiscuous also shows bad packets */
390 static int em_debug_sbp = FALSE;
391 /* Local switch for MSI/MSIX */
392 static int em_enable_msi = TRUE;
393
394 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
395 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
396 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
397 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
398 TUNABLE_INT("hw.em.rxd", &em_rxd);
399 TUNABLE_INT("hw.em.txd", &em_txd);
400 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
401 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
402 TUNABLE_INT("hw.em.enable_msi", &em_enable_msi);
403
404 #ifndef EM_LEGACY_IRQ
405 /* How many packets rxeof tries to clean at a time */
406 static int em_rx_process_limit = 100;
407 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
408 #endif
409
410 /* Flow control setting - default to FULL */
411 static int em_fc_setting = e1000_fc_full;
412 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
413
414 /*
415 ** Shadow VFTA table, this is needed because
416 ** the real vlan filter table gets cleared during
417 ** a soft reset and the driver needs to be able
418 ** to repopulate it.
419 */
420 static u32 em_shadow_vfta[EM_VFTA_SIZE];
421
422 /* Global used in WOL setup with multiport cards */
423 static int global_quad_port_a = 0;
424
425 /*********************************************************************
426  *  Device identification routine
427  *
428  *  em_probe determines if the driver should be loaded on
429  *  adapter based on PCI vendor/device id of the adapter.
430  *
431  *  return BUS_PROBE_DEFAULT on success, positive on failure
432  *********************************************************************/
433
434 static int
435 em_probe(device_t dev)
436 {
437         char            adapter_name[60];
438         u16             pci_vendor_id = 0;
439         u16             pci_device_id = 0;
440         u16             pci_subvendor_id = 0;
441         u16             pci_subdevice_id = 0;
442         em_vendor_info_t *ent;
443
444         INIT_DEBUGOUT("em_probe: begin");
445
446         pci_vendor_id = pci_get_vendor(dev);
447         if (pci_vendor_id != EM_VENDOR_ID)
448                 return (ENXIO);
449
450         pci_device_id = pci_get_device(dev);
451         pci_subvendor_id = pci_get_subvendor(dev);
452         pci_subdevice_id = pci_get_subdevice(dev);
453
454         ent = em_vendor_info_array;
455         while (ent->vendor_id != 0) {
456                 if ((pci_vendor_id == ent->vendor_id) &&
457                     (pci_device_id == ent->device_id) &&
458
459                     ((pci_subvendor_id == ent->subvendor_id) ||
460                     (ent->subvendor_id == PCI_ANY_ID)) &&
461
462                     ((pci_subdevice_id == ent->subdevice_id) ||
463                     (ent->subdevice_id == PCI_ANY_ID))) {
464                         ksprintf(adapter_name, "%s %s",
465                                 em_strings[ent->index],
466                                 em_driver_version);
467                         device_set_desc_copy(dev, adapter_name);
468                         return (BUS_PROBE_DEFAULT);
469                 }
470                 ent++;
471         }
472
473         return (ENXIO);
474 }
475
476 /*********************************************************************
477  *  Device initialization routine
478  *
479  *  The attach entry point is called when the driver is being loaded.
480  *  This routine identifies the type of hardware, allocates all resources
481  *  and initializes the hardware.
482  *
483  *  return 0 on success, positive on failure
484  *********************************************************************/
485
486 static int
487 em_attach(device_t dev)
488 {
489         struct adapter  *adapter;
490         int             tsize, rsize;
491         int             error = 0;
492
493         INIT_DEBUGOUT("em_attach: begin");
494
495         adapter = device_get_softc(dev);
496         adapter->dev = adapter->osdep.dev = dev;
497
498         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
499         EM_TX_LOCK_INIT(adapter, device_get_nameunit(dev));
500         EM_RX_LOCK_INIT(adapter, device_get_nameunit(dev));
501
502         /* SYSCTL stuff */
503         sysctl_ctx_init(&adapter->sysctl_ctx);
504         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
505                                         SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
506                                         device_get_nameunit(adapter->dev),
507                                         CTLFLAG_RD, 0, "");
508         if (adapter->sysctl_tree == NULL) {
509                 device_printf(adapter->dev, "can't add sysctl node\n");
510                 error = ENOMEM;
511                 goto err_sysctl;
512         }
513
514         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
515             SYSCTL_CHILDREN(adapter->sysctl_tree),
516             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
517             em_sysctl_debug_info, "I", "Debug Information");
518
519         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
520             SYSCTL_CHILDREN(adapter->sysctl_tree),
521             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
522             em_sysctl_stats, "I", "Statistics");
523
524         callout_init(&adapter->timer);
525         callout_init(&adapter->tx_fifo_timer);
526
527         /* Determine hardware and mac info */
528         em_identify_hardware(adapter);
529
530         /* Setup PCI resources */
531         if (em_allocate_pci_resources(adapter)) {
532                 device_printf(dev, "Allocation of PCI resources failed\n");
533                 error = ENXIO;
534                 goto err_pci;
535         }
536
537         /*
538         ** For ICH8 and family we need to
539         ** map the flash memory, and this
540         ** must happen after the MAC is 
541         ** identified
542         */
543         if ((adapter->hw.mac.type == e1000_ich8lan) ||
544             (adapter->hw.mac.type == e1000_pchlan) ||
545             (adapter->hw.mac.type == e1000_ich9lan) ||
546             (adapter->hw.mac.type == e1000_ich10lan)) {
547                 int rid = EM_BAR_TYPE_FLASH;
548                 adapter->flash = bus_alloc_resource_any(dev,
549                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
550                 if (adapter->flash == NULL) {
551                         device_printf(dev, "Mapping of Flash failed\n");
552                         error = ENXIO;
553                         goto err_pci;
554                 }
555                 /* This is used in the shared code */
556                 adapter->hw.flash_address = (u8 *)adapter->flash;
557                 adapter->osdep.flash_bus_space_tag =
558                     rman_get_bustag(adapter->flash);
559                 adapter->osdep.flash_bus_space_handle =
560                     rman_get_bushandle(adapter->flash);
561         }
562
563         /* Do Shared Code initialization */
564         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
565                 device_printf(dev, "Setup of Shared code failed\n");
566                 error = ENXIO;
567                 goto err_pci;
568         }
569
570         e1000_get_bus_info(&adapter->hw);
571
572         /* Set up some sysctls for the tunable interrupt delays */
573         em_add_int_delay_sysctl(adapter, "rx_int_delay",
574             "receive interrupt delay in usecs", &adapter->rx_int_delay,
575             E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
576         em_add_int_delay_sysctl(adapter, "tx_int_delay",
577             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
578             E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
579         if (adapter->hw.mac.type >= e1000_82540) {
580                 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
581                     "receive interrupt delay limit in usecs",
582                     &adapter->rx_abs_int_delay,
583                     E1000_REGISTER(&adapter->hw, E1000_RADV),
584                     em_rx_abs_int_delay_dflt);
585                 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
586                     "transmit interrupt delay limit in usecs",
587                     &adapter->tx_abs_int_delay,
588                     E1000_REGISTER(&adapter->hw, E1000_TADV),
589                     em_tx_abs_int_delay_dflt);
590         }
591
592 #ifndef EM_LEGACY_IRQ
593         /* Sysctls for limiting the amount of work done in the taskqueue */
594         em_add_rx_process_limit(adapter, "rx_processing_limit",
595             "max number of rx packets to process", &adapter->rx_process_limit,
596             em_rx_process_limit);
597 #endif
598
599         /*
600          * Validate number of transmit and receive descriptors. It
601          * must not exceed hardware maximum, and must be multiple
602          * of E1000_DBA_ALIGN.
603          */
604         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
605             (adapter->hw.mac.type >= e1000_82544 && em_txd > EM_MAX_TXD) ||
606             (adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) ||
607             (em_txd < EM_MIN_TXD)) {
608                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
609                     EM_DEFAULT_TXD, em_txd);
610                 adapter->num_tx_desc = EM_DEFAULT_TXD;
611         } else
612                 adapter->num_tx_desc = em_txd;
613         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
614             (adapter->hw.mac.type >= e1000_82544 && em_rxd > EM_MAX_RXD) ||
615             (adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) ||
616             (em_rxd < EM_MIN_RXD)) {
617                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
618                     EM_DEFAULT_RXD, em_rxd);
619                 adapter->num_rx_desc = EM_DEFAULT_RXD;
620         } else
621                 adapter->num_rx_desc = em_rxd;
622
623         adapter->hw.mac.autoneg = DO_AUTO_NEG;
624         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
625         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
626         adapter->rx_buffer_len = 2048;
627
628         e1000_init_script_state_82541(&adapter->hw, TRUE);
629         e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
630
631         /* Copper options */
632         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
633                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
634                 adapter->hw.phy.disable_polarity_correction = FALSE;
635                 adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
636         }
637
638         /*
639          * Set the frame limits assuming
640          * standard ethernet sized frames.
641          */
642         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
643         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
644
645         /*
646          * This controls when hardware reports transmit completion
647          * status.
648          */
649         adapter->hw.mac.report_tx_early = 1;
650
651         tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
652             EM_DBA_ALIGN);
653
654         /* Allocate Transmit Descriptor ring */
655         if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
656                 device_printf(dev, "Unable to allocate tx_desc memory\n");
657                 error = ENOMEM;
658                 goto err_tx_desc;
659         }
660         adapter->tx_desc_base = 
661             (struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
662
663         rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc),
664             EM_DBA_ALIGN);
665
666         /* Allocate Receive Descriptor ring */
667         if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
668                 device_printf(dev, "Unable to allocate rx_desc memory\n");
669                 error = ENOMEM;
670                 goto err_rx_desc;
671         }
672         adapter->rx_desc_base =
673             (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
674
675         /*
676         ** Start from a known state, this is
677         ** important in reading the nvm and
678         ** mac from that.
679         */
680         e1000_reset_hw(&adapter->hw);
681
682         /* Make sure we have a good EEPROM before we read from it */
683         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
684                 /*
685                 ** Some PCI-E parts fail the first check due to
686                 ** the link being in sleep state, call it again,
687                 ** if it fails a second time its a real issue.
688                 */
689                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
690                         device_printf(dev,
691                             "The EEPROM Checksum Is Not Valid\n");
692                         error = EIO;
693                         goto err_hw_init;
694                 }
695         }
696
697         /* Copy the permanent MAC address out of the EEPROM */
698         if (e1000_read_mac_addr(&adapter->hw) < 0) {
699                 device_printf(dev, "EEPROM read error while reading MAC"
700                     " address\n");
701                 error = EIO;
702                 goto err_hw_init;
703         }
704
705         if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
706                 device_printf(dev, "Invalid MAC address\n");
707                 error = EIO;
708                 goto err_hw_init;
709         }
710
711         /* Initialize the hardware */
712         if (em_hardware_init(adapter)) {
713                 device_printf(dev, "Unable to initialize the hardware\n");
714                 error = EIO;
715                 goto err_hw_init;
716         }
717
718         /* Allocate transmit descriptors and buffers */
719         if (em_allocate_transmit_structures(adapter)) {
720                 device_printf(dev, "Could not setup transmit structures\n");
721                 error = ENOMEM;
722                 goto err_tx_struct;
723         }
724
725         /* Allocate receive descriptors and buffers */
726         if (em_allocate_receive_structures(adapter)) {
727                 device_printf(dev, "Could not setup receive structures\n");
728                 error = ENOMEM;
729                 goto err_rx_struct;
730         }
731
732         /*
733         **  Do interrupt configuration
734         */
735         if (adapter->msi > 1) /* Do MSI/X */
736                 error = em_allocate_msix(adapter);
737         else  /* MSI or Legacy */
738                 error = em_allocate_legacy(adapter);
739         if (error)
740                 goto err_rx_struct;
741
742         /*
743          * Get Wake-on-Lan and Management info for later use
744          */
745         em_get_wakeup(dev);
746
747         /* Setup OS specific network interface */
748         em_setup_interface(dev, adapter);
749
750         /* Initialize statistics */
751         em_update_stats_counters(adapter);
752
753         adapter->hw.mac.get_link_status = 1;
754         em_update_link_status(adapter);
755
756         /* Indicate SOL/IDER usage */
757         if (e1000_check_reset_block(&adapter->hw))
758                 device_printf(dev,
759                     "PHY reset is blocked due to SOL/IDER session.\n");
760
761         /* Do we need workaround for 82544 PCI-X adapter? */
762         if (adapter->hw.bus.type == e1000_bus_type_pcix &&
763             adapter->hw.mac.type == e1000_82544)
764                 adapter->pcix_82544 = TRUE;
765         else
766                 adapter->pcix_82544 = FALSE;
767
768         /* Register for VLAN events */
769         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
770             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
771         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
772             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
773
774         /* Non-AMT based hardware can now take control from firmware */
775         if (adapter->has_manage && !adapter->has_amt)
776                 em_get_hw_control(adapter);
777
778         /* Tell the stack that the interface is not active */
779         adapter->ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
780
781         INIT_DEBUGOUT("em_attach: end");
782
783         return (0);
784
785 err_rx_struct:
786         em_free_transmit_structures(adapter);
787 err_tx_struct:
788 err_hw_init:
789         em_release_hw_control(adapter);
790         em_dma_free(adapter, &adapter->rxdma);
791 err_rx_desc:
792         em_dma_free(adapter, &adapter->txdma);
793 err_tx_desc:
794 err_pci:
795         em_free_pci_resources(adapter);
796 err_sysctl:
797         sysctl_ctx_free(&adapter->sysctl_ctx);
798         EM_TX_LOCK_DESTROY(adapter);
799         EM_RX_LOCK_DESTROY(adapter);
800         EM_CORE_LOCK_DESTROY(adapter);
801
802         return (error);
803 }
804
805 /*********************************************************************
806  *  Device removal routine
807  *
808  *  The detach entry point is called when the driver is being removed.
809  *  This routine stops the adapter and deallocates all the resources
810  *  that were allocated for driver operation.
811  *
812  *  return 0 on success, positive on failure
813  *********************************************************************/
814
815 static int
816 em_detach(device_t dev)
817 {
818         struct adapter  *adapter = device_get_softc(dev);
819
820         INIT_DEBUGOUT("em_detach: begin");
821
822         /* Make sure VLANS are not using driver */
823         if (adapter->ifp->if_vlantrunks != NULL) {
824                 device_printf(dev,"Vlan in use, detach first\n");
825                 return (EBUSY);
826         }
827
828 #ifdef DEVICE_POLLING
829         if (ifp->if_capenable & IFCAP_POLLING)
830                 ether_poll_deregister(ifp);
831 #endif
832
833         EM_CORE_LOCK(adapter);
834         EM_TX_LOCK(adapter);
835         adapter->in_detach = 1;
836         em_stop(adapter);
837         e1000_phy_hw_reset(&adapter->hw);
838
839         em_release_manageability(adapter);
840
841         EM_TX_UNLOCK(adapter);
842         EM_CORE_UNLOCK(adapter);
843
844         /* Unregister VLAN events */
845         if (adapter->vlan_attach != NULL)
846                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
847         if (adapter->vlan_detach != NULL)
848                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
849
850         ether_ifdetach(adapter->ifp);
851         callout_stop(&adapter->timer);
852         callout_stop(&adapter->tx_fifo_timer);
853
854         em_free_pci_resources(adapter);
855         bus_generic_detach(dev);
856
857         em_free_transmit_structures(adapter);
858         em_free_receive_structures(adapter);
859
860         /* Free Transmit Descriptor ring */
861         if (adapter->tx_desc_base) {
862                 em_dma_free(adapter, &adapter->txdma);
863                 adapter->tx_desc_base = NULL;
864         }
865
866         /* Free Receive Descriptor ring */
867         if (adapter->rx_desc_base) {
868                 em_dma_free(adapter, &adapter->rxdma);
869                 adapter->rx_desc_base = NULL;
870         }
871
872         em_release_hw_control(adapter);
873         sysctl_ctx_free(&adapter->sysctl_ctx);
874         EM_TX_LOCK_DESTROY(adapter);
875         EM_RX_LOCK_DESTROY(adapter);
876         EM_CORE_LOCK_DESTROY(adapter);
877
878         return (0);
879 }
880
881 /*********************************************************************
882  *
883  *  Shutdown entry point
884  *
885  **********************************************************************/
886
887 static int
888 em_shutdown(device_t dev)
889 {
890         return em_suspend(dev);
891 }
892
893 /*
894  * Suspend/resume device methods.
895  */
896 static int
897 em_suspend(device_t dev)
898 {
899         struct adapter *adapter = device_get_softc(dev);
900
901         EM_CORE_LOCK(adapter);
902
903         em_release_manageability(adapter);
904         em_release_hw_control(adapter);
905         em_enable_wakeup(dev);
906
907         EM_CORE_UNLOCK(adapter);
908
909         return bus_generic_suspend(dev);
910 }
911
912 static int
913 em_resume(device_t dev)
914 {
915         struct adapter *adapter = device_get_softc(dev);
916         struct ifnet *ifp = adapter->ifp;
917
918         EM_CORE_LOCK(adapter);
919         em_init_locked(adapter);
920         em_init_manageability(adapter);
921         EM_CORE_UNLOCK(adapter);
922         em_start(ifp);
923
924         return bus_generic_resume(dev);
925 }
926
927
928 /*********************************************************************
929  *  Transmit entry point
930  *
931  *  em_start is called by the stack to initiate a transmit.
932  *  The driver will remain in this routine as long as there are
933  *  packets to transmit and transmit resources are available.
934  *  In case resources are not available stack is notified and
935  *  the packet is requeued.
936  **********************************************************************/
937
938 #if 0
939 static int
940 em_mq_start_locked(struct ifnet *ifp, struct mbuf *m)
941 {
942         struct adapter  *adapter = ifp->if_softc;
943         struct mbuf     *next;
944         int error = E1000_SUCCESS;
945
946         EM_TX_LOCK_ASSERT(adapter);
947         /* To allow being called from a tasklet */
948         if (m == NULL)
949                 goto process;
950
951         if (((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) !=
952             IFF_RUNNING)
953             || (!adapter->link_active)) {
954                 error = drbr_enqueue(ifp, adapter->br, m);
955                 return (error);
956         } else if (!drbr_needs_enqueue(ifp, adapter->br) &&
957             (adapter->num_tx_desc_avail > EM_TX_OP_THRESHOLD)) {
958                 if ((error = em_xmit(adapter, &m)) != 0) {
959                         if (m)
960                                 error = drbr_enqueue(ifp, adapter->br, m);
961                         return (error);
962                 } else {
963                         /*
964                          * We've bypassed the buf ring so we need to update
965                          * ifp directly
966                          */
967                         drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
968                         /*
969                         ** Send a copy of the frame to the BPF
970                         ** listener and set the watchdog on.
971                         */
972                         ETHER_BPF_MTAP(ifp, m);
973                         adapter->watchdog_check = TRUE;
974                 }
975         } else if ((error = drbr_enqueue(ifp, adapter->br, m)) != 0)
976                 return (error);
977         
978 process:
979         if (drbr_empty(ifp, adapter->br))
980                 return(error);
981         /* Process the queue */
982         while (TRUE) {
983                 if ((ifp->if_flags & IFF_RUNNING) == 0)
984                         break;
985                 next = drbr_dequeue(ifp, adapter->br);
986                 if (next == NULL)
987                         break;
988                 if ((error = em_xmit(adapter, &next)) != 0) {
989                         if (next != NULL)
990                                 error = drbr_enqueue(ifp, adapter->br, next);
991                         break;
992                 }
993                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
994                 ETHER_BPF_MTAP(ifp, next);
995                 /* Set the watchdog */
996                 adapter->watchdog_check = TRUE;
997         }
998
999         if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD)
1000                 ifp->if_flags |= IFF_OACTIVE;
1001
1002         return (error);
1003 }
1004
1005 /*
1006 ** Multiqueue capable stack interface, this is not
1007 ** yet truely multiqueue, but that is coming...
1008 */
1009 static int
1010 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1011 {
1012         
1013         struct adapter *adapter = ifp->if_softc;
1014         int error = 0;
1015
1016         if (EM_TX_TRYLOCK(adapter)) {
1017                 if (ifp->if_flags & IFF_RUNNING)
1018                         error = em_mq_start_locked(ifp, m);
1019                 EM_TX_UNLOCK(adapter);
1020         } else 
1021                 error = drbr_enqueue(ifp, adapter->br, m);
1022
1023         return (error);
1024 }
1025
1026 static void
1027 em_qflush(struct ifnet *ifp)
1028 {
1029         struct mbuf *m;
1030         struct adapter *adapter = (struct adapter *)ifp->if_softc;
1031
1032         EM_TX_LOCK(adapter);
1033         while ((m = buf_ring_dequeue_sc(adapter->br)) != NULL)
1034                 m_freem(m);
1035         if_qflush(ifp);
1036         EM_TX_UNLOCK(adapter);
1037 }
1038 #endif
1039
1040 static void
1041 em_start_locked(struct ifnet *ifp)
1042 {
1043         struct adapter  *adapter = ifp->if_softc;
1044         struct mbuf     *m_head;
1045
1046         EM_TX_LOCK_ASSERT(adapter);
1047
1048         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) !=
1049             IFF_RUNNING)
1050                 return;
1051         if (!adapter->link_active)
1052                 return;
1053
1054         while (!ifq_is_empty(&ifp->if_snd)) {
1055
1056                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
1057                 if (m_head == NULL)
1058                         break;
1059                 /*
1060                  *  Encapsulation can modify our pointer, and or make it
1061                  *  NULL on failure.  In that event, we can't requeue.
1062                  */
1063                 if (em_xmit(adapter, &m_head)) {
1064                         if (m_head == NULL)
1065                                 break;
1066                         ifp->if_flags |= IFF_OACTIVE;
1067                         ifq_prepend(&ifp->if_snd, m_head);
1068                         break;
1069                 }
1070
1071                 /* Send a copy of the frame to the BPF listener */
1072                 ETHER_BPF_MTAP(ifp, m_head);
1073
1074                 /* Set timeout in case hardware has problems transmitting. */
1075                 adapter->watchdog_check = TRUE;
1076         }
1077         if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD)
1078                 ifp->if_flags |= IFF_OACTIVE;
1079
1080         return;
1081 }
1082
1083 static void
1084 em_start(struct ifnet *ifp)
1085 {
1086         struct adapter *adapter = ifp->if_softc;
1087
1088         EM_TX_LOCK(adapter);
1089         if (ifp->if_flags & IFF_RUNNING)
1090                 em_start_locked(ifp);
1091         EM_TX_UNLOCK(adapter);
1092 }
1093
1094 /*********************************************************************
1095  *  Ioctl entry point
1096  *
1097  *  em_ioctl is called when the user wants to configure the
1098  *  interface.
1099  *
1100  *  return 0 on success, positive on failure
1101  **********************************************************************/
1102
1103 static int
1104 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred * uc)
1105 {
1106         struct adapter  *adapter = ifp->if_softc;
1107         struct ifreq *ifr = (struct ifreq *)data;
1108 #ifdef INET
1109         struct ifaddr *ifa = (struct ifaddr *)data;
1110 #endif
1111         int error = 0;
1112
1113         if (adapter->in_detach)
1114                 return (error);
1115
1116         switch (command) {
1117         case SIOCSIFADDR:
1118 #ifdef INET
1119                 if (ifa->ifa_addr->sa_family == AF_INET) {
1120                         /*
1121                          * XXX
1122                          * Since resetting hardware takes a very long time
1123                          * and results in link renegotiation we only
1124                          * initialize the hardware only when it is absolutely
1125                          * required.
1126                          */
1127                         ifp->if_flags |= IFF_UP;
1128                         if (!(ifp->if_flags & IFF_RUNNING)) {
1129                                 EM_CORE_LOCK(adapter);
1130                                 em_init_locked(adapter);
1131                                 EM_CORE_UNLOCK(adapter);
1132                         }
1133                         arp_ifinit(ifp, ifa);
1134                 } else
1135 #endif
1136                         error = ether_ioctl(ifp, command, data);
1137                 break;
1138         case SIOCSIFMTU:
1139             {
1140                 int max_frame_size;
1141                 u16 eeprom_data = 0;
1142
1143                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1144
1145                 EM_CORE_LOCK(adapter);
1146                 switch (adapter->hw.mac.type) {
1147                 case e1000_82573:
1148                         /*
1149                          * 82573 only supports jumbo frames
1150                          * if ASPM is disabled.
1151                          */
1152                         e1000_read_nvm(&adapter->hw,
1153                             NVM_INIT_3GIO_3, 1, &eeprom_data);
1154                         if (eeprom_data & NVM_WORD1A_ASPM_MASK) {
1155                                 max_frame_size = ETHER_MAX_LEN;
1156                                 break;
1157                         }
1158                         /* Allow Jumbo frames - fall thru */
1159                 case e1000_82571:
1160                 case e1000_82572:
1161                 case e1000_ich9lan:
1162                 case e1000_ich10lan:
1163                 case e1000_82574:
1164                 case e1000_80003es2lan: /* Limit Jumbo Frame size */
1165                         max_frame_size = 9234;
1166                         break;
1167                 case e1000_pchlan:
1168                         max_frame_size = 4096;
1169                         break;
1170                         /* Adapters that do not support jumbo frames */
1171                 case e1000_82542:
1172                 case e1000_82583:
1173                 case e1000_ich8lan:
1174                         max_frame_size = ETHER_MAX_LEN;
1175                         break;
1176                 default:
1177                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1178                 }
1179                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1180                     ETHER_CRC_LEN) {
1181                         EM_CORE_UNLOCK(adapter);
1182                         error = EINVAL;
1183                         break;
1184                 }
1185
1186                 ifp->if_mtu = ifr->ifr_mtu;
1187                 adapter->max_frame_size =
1188                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1189                 em_init_locked(adapter);
1190                 EM_CORE_UNLOCK(adapter);
1191                 break;
1192             }
1193         case SIOCSIFFLAGS:
1194                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1195                     SIOCSIFFLAGS (Set Interface Flags)");
1196                 EM_CORE_LOCK(adapter);
1197                 if (ifp->if_flags & IFF_UP) {
1198                         if ((ifp->if_flags & IFF_RUNNING)) {
1199                                 if ((ifp->if_flags ^ adapter->if_flags) &
1200                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1201                                         em_disable_promisc(adapter);
1202                                         em_set_promisc(adapter);
1203                                 }
1204                         } else
1205                                 em_init_locked(adapter);
1206                 } else
1207                         if (ifp->if_flags & IFF_RUNNING) {
1208                                 EM_TX_LOCK(adapter);
1209                                 em_stop(adapter);
1210                                 EM_TX_UNLOCK(adapter);
1211                         }
1212                 adapter->if_flags = ifp->if_flags;
1213                 EM_CORE_UNLOCK(adapter);
1214                 break;
1215         case SIOCADDMULTI:
1216         case SIOCDELMULTI:
1217                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1218                 if (ifp->if_flags & IFF_RUNNING) {
1219                         EM_CORE_LOCK(adapter);
1220                         em_disable_intr(adapter);
1221                         em_set_multi(adapter);
1222                         if (adapter->hw.mac.type == e1000_82542 && 
1223                             adapter->hw.revision_id == E1000_REVISION_2) {
1224                                 em_initialize_receive_unit(adapter);
1225                         }
1226 #ifdef DEVICE_POLLING
1227                         if (!(ifp->if_capenable & IFCAP_POLLING))
1228 #endif
1229                                 em_enable_intr(adapter);
1230                         EM_CORE_UNLOCK(adapter);
1231                 }
1232                 break;
1233         case SIOCSIFMEDIA:
1234                 /* Check SOL/IDER usage */
1235                 EM_CORE_LOCK(adapter);
1236                 if (e1000_check_reset_block(&adapter->hw)) {
1237                         EM_CORE_UNLOCK(adapter);
1238                         device_printf(adapter->dev, "Media change is"
1239                             " blocked due to SOL/IDER session.\n");
1240                         break;
1241                 }
1242                 EM_CORE_UNLOCK(adapter);
1243         case SIOCGIFMEDIA:
1244                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1245                     SIOCxIFMEDIA (Get/Set Interface Media)");
1246                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1247                 break;
1248         case SIOCSIFCAP:
1249             {
1250                 int mask, reinit;
1251
1252                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1253                 reinit = 0;
1254                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1255 #ifdef DEVICE_POLLING
1256                 if (mask & IFCAP_POLLING) {
1257                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1258                                 error = ether_poll_register(em_poll, ifp);
1259                                 if (error)
1260                                         return (error);
1261                                 EM_CORE_LOCK(adapter);
1262                                 em_disable_intr(adapter);
1263                                 ifp->if_capenable |= IFCAP_POLLING;
1264                                 EM_CORE_UNLOCK(adapter);
1265                         } else {
1266                                 error = ether_poll_deregister(ifp);
1267                                 /* Enable interrupt even in error case */
1268                                 EM_CORE_LOCK(adapter);
1269                                 em_enable_intr(adapter);
1270                                 ifp->if_capenable &= ~IFCAP_POLLING;
1271                                 EM_CORE_UNLOCK(adapter);
1272                         }
1273                 }
1274 #endif
1275                 if (mask & IFCAP_HWCSUM) {
1276                         ifp->if_capenable ^= IFCAP_HWCSUM;
1277                         reinit = 1;
1278                 }
1279 #ifdef NET_TSO
1280                 if (mask & IFCAP_TSO4) {
1281                         ifp->if_capenable ^= IFCAP_TSO4;
1282                         reinit = 1;
1283                 }
1284 #endif
1285                 if (mask & IFCAP_VLAN_HWTAGGING) {
1286                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1287                         reinit = 1;
1288                 }
1289
1290                 if (mask & IFCAP_VLAN_HWFILTER) {
1291                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1292                         reinit = 1;
1293                 }
1294
1295                 if ((mask & IFCAP_WOL) &&
1296                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1297                         if (mask & IFCAP_WOL_MCAST)
1298                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1299                         if (mask & IFCAP_WOL_MAGIC)
1300                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1301                 }
1302
1303                 if (reinit && (ifp->if_flags & IFF_RUNNING))
1304                         em_init(adapter);
1305 #if 0
1306                 VLAN_CAPABILITIES(ifp);
1307 #endif
1308                 break;
1309             }
1310
1311         default:
1312                 error = ether_ioctl(ifp, command, data);
1313                 break;
1314         }
1315
1316         return (error);
1317 }
1318
1319
1320 /*********************************************************************
1321  *  Init entry point
1322  *
1323  *  This routine is used in two ways. It is used by the stack as
1324  *  init entry point in network interface structure. It is also used
1325  *  by the driver as a hw/sw initialization routine to get to a
1326  *  consistent state.
1327  *
1328  *  return 0 on success, positive on failure
1329  **********************************************************************/
1330
1331 static void
1332 em_init_locked(struct adapter *adapter)
1333 {
1334         struct ifnet    *ifp = adapter->ifp;
1335         device_t        dev = adapter->dev;
1336         u32             pba;
1337
1338         INIT_DEBUGOUT("em_init: begin");
1339
1340         EM_CORE_LOCK_ASSERT(adapter);
1341
1342         EM_TX_LOCK(adapter);
1343         em_stop(adapter);
1344         EM_TX_UNLOCK(adapter);
1345
1346         /*
1347          * Packet Buffer Allocation (PBA)
1348          * Writing PBA sets the receive portion of the buffer
1349          * the remainder is used for the transmit buffer.
1350          *
1351          * Devices before the 82547 had a Packet Buffer of 64K.
1352          *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1353          * After the 82547 the buffer was reduced to 40K.
1354          *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1355          *   Note: default does not leave enough room for Jumbo Frame >10k.
1356          */
1357         switch (adapter->hw.mac.type) {
1358         case e1000_82547:
1359         case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1360                 if (adapter->max_frame_size > 8192)
1361                         pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1362                 else
1363                         pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1364                 adapter->tx_fifo_head = 0;
1365                 adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1366                 adapter->tx_fifo_size =
1367                     (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1368                 break;
1369         /* Total Packet Buffer on these is 48K */
1370         case e1000_82571:
1371         case e1000_82572:
1372         case e1000_80003es2lan:
1373                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1374                 break;
1375         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1376                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1377                 break;
1378         case e1000_82574:
1379         case e1000_82583:
1380                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1381                 break;
1382         case e1000_ich9lan:
1383         case e1000_ich10lan:
1384         case e1000_pchlan:
1385                 pba = E1000_PBA_10K;
1386                 break;
1387         case e1000_ich8lan:
1388                 pba = E1000_PBA_8K;
1389                 break;
1390         default:
1391                 /* Devices before 82547 had a Packet Buffer of 64K.   */
1392                 if (adapter->max_frame_size > 8192)
1393                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1394                 else
1395                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1396         }
1397
1398         INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1399         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1400         
1401         /* Get the latest mac address, User can use a LAA */
1402         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1403               ETHER_ADDR_LEN);
1404
1405         /* Put the address into the Receive Address Array */
1406         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1407
1408         /*
1409          * With the 82571 adapter, RAR[0] may be overwritten
1410          * when the other port is reset, we make a duplicate
1411          * in RAR[14] for that eventuality, this assures
1412          * the interface continues to function.
1413          */
1414         if (adapter->hw.mac.type == e1000_82571) {
1415                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1416                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1417                     E1000_RAR_ENTRIES - 1);
1418         }
1419
1420         /* Initialize the hardware */
1421         if (em_hardware_init(adapter)) {
1422                 device_printf(dev, "Unable to initialize the hardware\n");
1423                 return;
1424         }
1425         em_update_link_status(adapter);
1426
1427         /* Setup VLAN support, basic and offload if available */
1428         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1429         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1430                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1431                         /* Use real VLAN Filter support */
1432                         em_setup_vlan_hw_support(adapter);
1433                 else {
1434                         u32 ctrl;
1435                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1436                         ctrl |= E1000_CTRL_VME;
1437                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1438                 }
1439         }
1440
1441         /* Set hardware offload abilities */
1442         ifp->if_hwassist = 0;
1443         if (adapter->hw.mac.type >= e1000_82543) {
1444                 if (ifp->if_capenable & IFCAP_TXCSUM)
1445                         ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1446 #ifdef NET_TSO
1447                 if (ifp->if_capenable & IFCAP_TSO4)
1448                         ifp->if_hwassist |= CSUM_TSO;
1449 #endif
1450         }
1451
1452         /* Configure for OS presence */
1453         em_init_manageability(adapter);
1454
1455         /* Prepare transmit descriptors and buffers */
1456         em_setup_transmit_structures(adapter);
1457         em_initialize_transmit_unit(adapter);
1458
1459         /* Setup Multicast table */
1460         em_set_multi(adapter);
1461
1462         /* Prepare receive descriptors and buffers */
1463         if (em_setup_receive_structures(adapter)) {
1464                 device_printf(dev, "Could not setup receive structures\n");
1465                 EM_TX_LOCK(adapter);
1466                 em_stop(adapter);
1467                 EM_TX_UNLOCK(adapter);
1468                 return;
1469         }
1470         em_initialize_receive_unit(adapter);
1471
1472         /* Don't lose promiscuous settings */
1473         em_set_promisc(adapter);
1474
1475         ifp->if_flags |= IFF_RUNNING;
1476         ifp->if_flags &= ~IFF_OACTIVE;
1477
1478         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1479         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1480
1481         /* MSI/X configuration for 82574 */
1482         if (adapter->hw.mac.type == e1000_82574) {
1483                 int tmp;
1484                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1485                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1486                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1487                 /*
1488                 ** Set the IVAR - interrupt vector routing.
1489                 ** Each nibble represents a vector, high bit
1490                 ** is enable, other 3 bits are the MSIX table
1491                 ** entry, we map RXQ0 to 0, TXQ0 to 1, and
1492                 ** Link (other) to 2, hence the magic number.
1493                 */
1494                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, 0x800A0908);
1495         }
1496
1497 #ifdef DEVICE_POLLING
1498         /*
1499          * Only enable interrupts if we are not polling, make sure
1500          * they are off otherwise.
1501          */
1502         if (ifp->if_capenable & IFCAP_POLLING)
1503                 em_disable_intr(adapter);
1504         else
1505 #endif /* DEVICE_POLLING */
1506                 em_enable_intr(adapter);
1507
1508         /* AMT based hardware can now take control from firmware */
1509         if (adapter->has_manage && adapter->has_amt)
1510                 em_get_hw_control(adapter);
1511
1512         /* Don't reset the phy next time init gets called */
1513         adapter->hw.phy.reset_disable = TRUE;
1514 }
1515
1516 static void
1517 em_init(void *arg)
1518 {
1519         struct adapter *adapter = arg;
1520
1521         EM_CORE_LOCK(adapter);
1522         em_init_locked(adapter);
1523         EM_CORE_UNLOCK(adapter);
1524 }
1525
1526
1527 #ifdef DEVICE_POLLING
1528 /*********************************************************************
1529  *
1530  *  Legacy polling routine  
1531  *
1532  *********************************************************************/
1533 static int
1534 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1535 {
1536         struct adapter *adapter = ifp->if_softc;
1537         u32             reg_icr, rx_done = 0;
1538
1539         EM_CORE_LOCK(adapter);
1540         if ((ifp->if_flags & IFF_RUNNING) == 0) {
1541                 EM_CORE_UNLOCK(adapter);
1542                 return (rx_done);
1543         }
1544
1545         if (cmd == POLL_AND_CHECK_STATUS) {
1546                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1547                 /* Link status change */
1548                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1549                         adapter->hw.mac.get_link_status = 1;
1550                         em_update_link_status(adapter);
1551                 }
1552                 if (reg_icr & E1000_ICR_RXO)
1553                         adapter->rx_overruns++;
1554         }
1555         EM_CORE_UNLOCK(adapter);
1556
1557         rx_done = em_rxeof(adapter, count);
1558
1559         EM_TX_LOCK(adapter);
1560         em_txeof(adapter);
1561 #if 0
1562         if (!drbr_empty(ifp, adapter->br))
1563                 em_mq_start_locked(ifp, NULL);
1564 #else
1565         if (!ifq_is_empty(&ifp->if_snd))
1566                 em_start_locked(ifp);
1567 #endif
1568         EM_TX_UNLOCK(adapter);
1569         return (rx_done);
1570 }
1571 #endif /* DEVICE_POLLING */
1572
1573 #ifdef EM_LEGACY_IRQ 
1574 /*********************************************************************
1575  *
1576  *  Legacy Interrupt Service routine  
1577  *
1578  *********************************************************************/
1579
1580 static void
1581 em_intr(void *arg)
1582 {
1583         struct adapter  *adapter = arg;
1584         struct ifnet    *ifp = adapter->ifp;
1585         u32             reg_icr;
1586
1587
1588         if (ifp->if_capenable & IFCAP_POLLING)
1589                 return;
1590
1591         EM_CORE_LOCK(adapter);
1592         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1593         if (reg_icr & E1000_ICR_RXO)
1594                 adapter->rx_overruns++;
1595         if ((reg_icr == 0xffffffff) || (reg_icr == 0)||
1596             (adapter->hw.mac.type >= e1000_82571 &&
1597             (reg_icr & E1000_ICR_INT_ASSERTED) == 0))
1598                         goto out;
1599
1600         if ((ifp->if_flags & IFF_RUNNING) == 0)
1601                         goto out;
1602
1603         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1604                 callout_stop(&adapter->timer);
1605                 adapter->hw.mac.get_link_status = 1;
1606                 em_update_link_status(adapter);
1607                 /* Deal with TX cruft when link lost */
1608                 em_tx_purge(adapter);
1609                 callout_reset(&adapter->timer, hz,
1610                     em_local_timer, adapter);
1611                 goto out;
1612         }
1613
1614         EM_TX_LOCK(adapter);
1615         em_txeof(adapter);
1616         em_rxeof(adapter, -1);
1617         em_txeof(adapter);
1618         if (ifp->if_flags & IFF_RUNNING &&
1619             !ifq_is_empty(&ifp->if_snd))
1620                 em_start_locked(ifp);
1621         EM_TX_UNLOCK(adapter);
1622
1623 out:
1624         EM_CORE_UNLOCK(adapter);
1625         return;
1626 }
1627
1628 #else /* EM_FAST_IRQ, then fast interrupt routines only */
1629
1630 static void
1631 em_handle_link(void *context, int pending)
1632 {
1633         struct adapter  *adapter = context;
1634         struct ifnet *ifp = adapter->ifp;
1635
1636         if (!(ifp->if_flags & IFF_RUNNING))
1637                 return;
1638
1639         EM_CORE_LOCK(adapter);
1640         callout_stop(&adapter->timer);
1641         em_update_link_status(adapter);
1642         /* Deal with TX cruft when link lost */
1643         em_tx_purge(adapter);
1644         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1645         EM_CORE_UNLOCK(adapter);
1646 }
1647
1648
1649 /* Combined RX/TX handler, used by Legacy and MSI */
1650 static void
1651 em_handle_rxtx(void *context, int pending)
1652 {
1653         struct adapter  *adapter = context;
1654         struct ifnet    *ifp = adapter->ifp;
1655
1656
1657         if (ifp->if_flags & IFF_RUNNING) {
1658                 if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1659                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1660                 EM_TX_LOCK(adapter);
1661                 em_txeof(adapter);
1662 #if 0
1663                 if (!drbr_empty(ifp, adapter->br))
1664                         em_mq_start_locked(ifp, NULL);
1665 #else
1666                 if (!ifq_is_empty(&ifp->if_snd))
1667                         em_start_locked(ifp);
1668 #endif
1669                 EM_TX_UNLOCK(adapter);
1670         }
1671
1672         em_enable_intr(adapter);
1673 }
1674
1675 /*********************************************************************
1676  *
1677  *  Fast Legacy/MSI Combined Interrupt Service routine  
1678  *
1679  *********************************************************************/
1680 #define FILTER_STRAY
1681 #define FILTER_HANDLED
1682 static void
1683 em_irq_fast(void *arg)
1684 {
1685         struct adapter  *adapter = arg;
1686         struct ifnet    *ifp;
1687         u32             reg_icr;
1688
1689         ifp = adapter->ifp;
1690
1691         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1692
1693         /* Hot eject?  */
1694         if (reg_icr == 0xffffffff)
1695                 return FILTER_STRAY;
1696
1697         /* Definitely not our interrupt.  */
1698         if (reg_icr == 0x0)
1699                 return FILTER_STRAY;
1700
1701         /*
1702          * Starting with the 82571 chip, bit 31 should be used to
1703          * determine whether the interrupt belongs to us.
1704          */
1705         if (adapter->hw.mac.type >= e1000_82571 &&
1706             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1707                 return FILTER_STRAY;
1708
1709         /*
1710          * Mask interrupts until the taskqueue is finished running.  This is
1711          * cheap, just assume that it is needed.  This also works around the
1712          * MSI message reordering errata on certain systems.
1713          */
1714         em_disable_intr(adapter);
1715         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1716
1717         /* Link status change */
1718         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1719                 adapter->hw.mac.get_link_status = 1;
1720                 taskqueue_enqueue(taskqueue_swi, &adapter->link_task);
1721         }
1722
1723         if (reg_icr & E1000_ICR_RXO)
1724                 adapter->rx_overruns++;
1725         return FILTER_HANDLED;
1726 }
1727
1728 /*********************************************************************
1729  *
1730  *  MSIX Interrupt Service Routines
1731  *
1732  **********************************************************************/
1733 #define EM_MSIX_TX      0x00040000
1734 #define EM_MSIX_RX      0x00010000
1735 #define EM_MSIX_LINK    0x00100000
1736
1737 static void
1738 em_msix_tx(void *arg)
1739 {
1740         struct adapter *adapter = arg;
1741         struct ifnet    *ifp = adapter->ifp;
1742
1743         ++adapter->tx_irq;
1744         if (ifp->if_flags & IFF_RUNNING) {
1745                 EM_TX_LOCK(adapter);
1746                 em_txeof(adapter);
1747                 EM_TX_UNLOCK(adapter);
1748                 taskqueue_enqueue(adapter->tq, &adapter->tx_task);
1749         }
1750         /* Reenable this interrupt */
1751         E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_TX);
1752         return;
1753 }
1754
1755 /*********************************************************************
1756  *
1757  *  MSIX RX Interrupt Service routine
1758  *
1759  **********************************************************************/
1760
1761 static void
1762 em_msix_rx(void *arg)
1763 {
1764         struct adapter *adapter = arg;
1765         struct ifnet    *ifp = adapter->ifp;
1766
1767         ++adapter->rx_irq;
1768         if ((ifp->if_flags & IFF_RUNNING) &&
1769             (em_rxeof(adapter, adapter->rx_process_limit) != 0))
1770                 taskqueue_enqueue(adapter->tq, &adapter->rx_task);
1771         /* Reenable this interrupt */
1772         E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_RX);
1773         return;
1774 }
1775
1776 /*********************************************************************
1777  *
1778  *  MSIX Link Fast Interrupt Service routine
1779  *
1780  **********************************************************************/
1781
1782 static void
1783 em_msix_link(void *arg)
1784 {
1785         struct adapter  *adapter = arg;
1786         u32             reg_icr;
1787
1788         ++adapter->link_irq;
1789         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1790
1791         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1792                 adapter->hw.mac.get_link_status = 1;
1793                 taskqueue_enqueue(taskqueue_swi, &adapter->link_task);
1794         }
1795         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1796             EM_MSIX_LINK | E1000_IMS_LSC);
1797         return;
1798 }
1799
1800 static void
1801 em_handle_rx(void *context, int pending)
1802 {
1803         struct adapter  *adapter = context;
1804         struct ifnet    *ifp = adapter->ifp;
1805
1806         if ((ifp->if_flags & IFF_RUNNING) &&
1807             (em_rxeof(adapter, adapter->rx_process_limit) != 0))
1808                 taskqueue_enqueue(adapter->tq, &adapter->rx_task);
1809
1810 }
1811
1812 static void
1813 em_handle_tx(void *context, int pending)
1814 {
1815         struct adapter  *adapter = context;
1816         struct ifnet    *ifp = adapter->ifp;
1817
1818         if (ifp->if_flags & IFF_RUNNING) {
1819                 if (!EM_TX_TRYLOCK(adapter))
1820                         return;
1821                 em_txeof(adapter);
1822 #if 0
1823                 if (!drbr_empty(ifp, adapter->br))
1824                         em_mq_start_locked(ifp, NULL);
1825 #else
1826                 if (!ifq_is_empty(&ifp->if_snd))
1827                         em_start_locked(ifp);
1828 #endif
1829                 EM_TX_UNLOCK(adapter);
1830         }
1831 }
1832 #endif /* EM_FAST_IRQ */
1833
1834 /*********************************************************************
1835  *
1836  *  Media Ioctl callback
1837  *
1838  *  This routine is called whenever the user queries the status of
1839  *  the interface using ifconfig.
1840  *
1841  **********************************************************************/
1842 static void
1843 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1844 {
1845         struct adapter *adapter = ifp->if_softc;
1846         u_char fiber_type = IFM_1000_SX;
1847
1848         INIT_DEBUGOUT("em_media_status: begin");
1849
1850         EM_CORE_LOCK(adapter);
1851         em_update_link_status(adapter);
1852
1853         ifmr->ifm_status = IFM_AVALID;
1854         ifmr->ifm_active = IFM_ETHER;
1855
1856         if (!adapter->link_active) {
1857                 EM_CORE_UNLOCK(adapter);
1858                 return;
1859         }
1860
1861         ifmr->ifm_status |= IFM_ACTIVE;
1862
1863         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1864             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1865                 if (adapter->hw.mac.type == e1000_82545)
1866                         fiber_type = IFM_1000_LX;
1867                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1868         } else {
1869                 switch (adapter->link_speed) {
1870                 case 10:
1871                         ifmr->ifm_active |= IFM_10_T;
1872                         break;
1873                 case 100:
1874                         ifmr->ifm_active |= IFM_100_TX;
1875                         break;
1876                 case 1000:
1877                         ifmr->ifm_active |= IFM_1000_T;
1878                         break;
1879                 }
1880                 if (adapter->link_duplex == FULL_DUPLEX)
1881                         ifmr->ifm_active |= IFM_FDX;
1882                 else
1883                         ifmr->ifm_active |= IFM_HDX;
1884         }
1885         EM_CORE_UNLOCK(adapter);
1886 }
1887
1888 /*********************************************************************
1889  *
1890  *  Media Ioctl callback
1891  *
1892  *  This routine is called when the user changes speed/duplex using
1893  *  media/mediopt option with ifconfig.
1894  *
1895  **********************************************************************/
1896 static int
1897 em_media_change(struct ifnet *ifp)
1898 {
1899         struct adapter *adapter = ifp->if_softc;
1900         struct ifmedia  *ifm = &adapter->media;
1901
1902         INIT_DEBUGOUT("em_media_change: begin");
1903
1904         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1905                 return (EINVAL);
1906
1907         EM_CORE_LOCK(adapter);
1908         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1909         case IFM_AUTO:
1910                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1911                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1912                 break;
1913         case IFM_1000_LX:
1914         case IFM_1000_SX:
1915         case IFM_1000_T:
1916                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1917                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1918                 break;
1919         case IFM_100_TX:
1920                 adapter->hw.mac.autoneg = FALSE;
1921                 adapter->hw.phy.autoneg_advertised = 0;
1922                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1923                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1924                 else
1925                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1926                 break;
1927         case IFM_10_T:
1928                 adapter->hw.mac.autoneg = FALSE;
1929                 adapter->hw.phy.autoneg_advertised = 0;
1930                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1931                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1932                 else
1933                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1934                 break;
1935         default:
1936                 device_printf(adapter->dev, "Unsupported media type\n");
1937         }
1938
1939         /* As the speed/duplex settings my have changed we need to
1940          * reset the PHY.
1941          */
1942         adapter->hw.phy.reset_disable = FALSE;
1943
1944         em_init_locked(adapter);
1945         EM_CORE_UNLOCK(adapter);
1946
1947         return (0);
1948 }
1949
1950 /*********************************************************************
1951  *
1952  *  This routine maps the mbufs to tx descriptors.
1953  *
1954  *  return 0 on success, positive on failure
1955  **********************************************************************/
1956
1957 static int
1958 em_xmit(struct adapter *adapter, struct mbuf **m_headp)
1959 {
1960         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1961         bus_dmamap_t            map;
1962         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1963         struct e1000_tx_desc    *ctxd = NULL;
1964         struct mbuf             *m_head;
1965         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1966         int                     nsegs, i, j, first, last = 0;
1967         int                     error, do_tso, tso_desc = 0;
1968
1969         m_head = *m_headp;
1970         txd_upper = txd_lower = txd_used = txd_saved = 0;
1971
1972 #ifdef NET_TSO
1973         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1974 #else
1975         do_tso = 0;
1976 #endif
1977
1978         /*
1979          * Force a cleanup if number of TX descriptors
1980          * available hits the threshold
1981          */
1982         if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1983                 em_txeof(adapter);
1984                 /* Now do we at least have a minimal? */
1985                 if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1986                         adapter->no_tx_desc_avail1++;
1987                         return (ENOBUFS);
1988                 }
1989         }
1990
1991
1992         /*
1993          * TSO workaround: 
1994          *  If an mbuf is only header we need  
1995          *     to pull 4 bytes of data into it. 
1996          */
1997         if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1998                 m_head = m_pullup(m_head, M_TSO_LEN + 4);
1999                 *m_headp = m_head;
2000                 if (m_head == NULL)
2001                         return (ENOBUFS);
2002         }
2003
2004         /*
2005          * Map the packet for DMA
2006          *
2007          * Capture the first descriptor index,
2008          * this descriptor will have the index
2009          * of the EOP which is the only one that
2010          * now gets a DONE bit writeback.
2011          */
2012         first = adapter->next_avail_tx_desc;
2013         tx_buffer = &adapter->tx_buffer_area[first];
2014         tx_buffer_mapped = tx_buffer;
2015         map = tx_buffer->map;
2016
2017         error = bus_dmamap_load_mbuf_segment(adapter->txtag, map,
2018             *m_headp, segs, EM_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
2019
2020         /*
2021          * There are two types of errors we can (try) to handle:
2022          * - EFBIG means the mbuf chain was too long and bus_dma ran
2023          *   out of segments.  Defragment the mbuf chain and try again.
2024          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2025          *   at this point in time.  Defer sending and try again later.
2026          * All other errors, in particular EINVAL, are fatal and prevent the
2027          * mbuf chain from ever going through.  Drop it and report error.
2028          */
2029         if (error == EFBIG) {
2030                 struct mbuf *m;
2031
2032                 m = m_defrag(*m_headp, MB_DONTWAIT);
2033                 if (m == NULL) {
2034                         adapter->mbuf_alloc_failed++;
2035                         m_freem(*m_headp);
2036                         *m_headp = NULL;
2037                         return (ENOBUFS);
2038                 }
2039                 *m_headp = m;
2040
2041                 /* Try it again */
2042                 error = bus_dmamap_load_mbuf_segment(adapter->txtag, map,
2043                     *m_headp, segs, EM_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
2044
2045                 if (error) {
2046                         adapter->no_tx_dma_setup++;
2047                         m_freem(*m_headp);
2048                         *m_headp = NULL;
2049                         return (error);
2050                 }
2051         } else if (error != 0) {
2052                 adapter->no_tx_dma_setup++;
2053                 return (error);
2054         }
2055
2056         /*
2057          * TSO Hardware workaround, if this packet is not
2058          * TSO, and is only a single descriptor long, and
2059          * it follows a TSO burst, then we need to add a
2060          * sentinel descriptor to prevent premature writeback.
2061          */
2062         if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
2063                 if (nsegs == 1)
2064                         tso_desc = TRUE;
2065                 adapter->tx_tso = FALSE;
2066         }
2067
2068         if (nsegs > (adapter->num_tx_desc_avail - 2)) {
2069                 adapter->no_tx_desc_avail2++;
2070                 bus_dmamap_unload(adapter->txtag, map);
2071                 return (ENOBUFS);
2072         }
2073         m_head = *m_headp;
2074
2075         /* Do hardware assists */
2076 #ifdef NET_TSO
2077         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2078                 error = em_tso_setup(adapter, m_head, &txd_upper, &txd_lower);
2079                 if (error != TRUE)
2080                         return (ENXIO); /* something foobar */
2081                 /* we need to make a final sentinel transmit desc */
2082                 tso_desc = TRUE;
2083         } else
2084 #endif
2085         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2086                 em_transmit_checksum_setup(adapter,  m_head,
2087                     &txd_upper, &txd_lower);
2088
2089         i = adapter->next_avail_tx_desc;
2090         if (adapter->pcix_82544) 
2091                 txd_saved = i;
2092
2093         /* Set up our transmit descriptors */
2094         for (j = 0; j < nsegs; j++) {
2095                 bus_size_t seg_len;
2096                 bus_addr_t seg_addr;
2097                 /* If adapter is 82544 and on PCIX bus */
2098                 if(adapter->pcix_82544) {
2099                         DESC_ARRAY      desc_array;
2100                         u32             array_elements, counter;
2101                         /*
2102                          * Check the Address and Length combination and
2103                          * split the data accordingly
2104                          */
2105                         array_elements = em_fill_descriptors(segs[j].ds_addr,
2106                             segs[j].ds_len, &desc_array);
2107                         for (counter = 0; counter < array_elements; counter++) {
2108                                 if (txd_used == adapter->num_tx_desc_avail) {
2109                                         adapter->next_avail_tx_desc = txd_saved;
2110                                         adapter->no_tx_desc_avail2++;
2111                                         bus_dmamap_unload(adapter->txtag, map);
2112                                         return (ENOBUFS);
2113                                 }
2114                                 tx_buffer = &adapter->tx_buffer_area[i];
2115                                 ctxd = &adapter->tx_desc_base[i];
2116                                 ctxd->buffer_addr = htole64(
2117                                     desc_array.descriptor[counter].address);
2118                                 ctxd->lower.data = htole32(
2119                                     (adapter->txd_cmd | txd_lower | (u16)
2120                                     desc_array.descriptor[counter].length));
2121                                 ctxd->upper.data =
2122                                     htole32((txd_upper));
2123                                 last = i;
2124                                 if (++i == adapter->num_tx_desc)
2125                                          i = 0;
2126                                 tx_buffer->m_head = NULL;
2127                                 tx_buffer->next_eop = -1;
2128                                 txd_used++;
2129                         }
2130                 } else {
2131                         tx_buffer = &adapter->tx_buffer_area[i];
2132                         ctxd = &adapter->tx_desc_base[i];
2133                         seg_addr = segs[j].ds_addr;
2134                         seg_len  = segs[j].ds_len;
2135                         /*
2136                         ** TSO Workaround:
2137                         ** If this is the last descriptor, we want to
2138                         ** split it so we have a small final sentinel
2139                         */
2140                         if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2141                                 seg_len -= 4;
2142                                 ctxd->buffer_addr = htole64(seg_addr);
2143                                 ctxd->lower.data = htole32(
2144                                 adapter->txd_cmd | txd_lower | seg_len);
2145                                 ctxd->upper.data =
2146                                     htole32(txd_upper);
2147                                 if (++i == adapter->num_tx_desc)
2148                                         i = 0;
2149                                 /* Now make the sentinel */     
2150                                 ++txd_used; /* using an extra txd */
2151                                 ctxd = &adapter->tx_desc_base[i];
2152                                 tx_buffer = &adapter->tx_buffer_area[i];
2153                                 ctxd->buffer_addr =
2154                                     htole64(seg_addr + seg_len);
2155                                 ctxd->lower.data = htole32(
2156                                 adapter->txd_cmd | txd_lower | 4);
2157                                 ctxd->upper.data =
2158                                     htole32(txd_upper);
2159                                 last = i;
2160                                 if (++i == adapter->num_tx_desc)
2161                                         i = 0;
2162                         } else {
2163                                 ctxd->buffer_addr = htole64(seg_addr);
2164                                 ctxd->lower.data = htole32(
2165                                 adapter->txd_cmd | txd_lower | seg_len);
2166                                 ctxd->upper.data =
2167                                     htole32(txd_upper);
2168                                 last = i;
2169                                 if (++i == adapter->num_tx_desc)
2170                                         i = 0;
2171                         }
2172                         tx_buffer->m_head = NULL;
2173                         tx_buffer->next_eop = -1;
2174                 }
2175         }
2176
2177         adapter->next_avail_tx_desc = i;
2178         if (adapter->pcix_82544)
2179                 adapter->num_tx_desc_avail -= txd_used;
2180         else {
2181                 adapter->num_tx_desc_avail -= nsegs;
2182                 if (tso_desc) /* TSO used an extra for sentinel */
2183                         adapter->num_tx_desc_avail -= txd_used;
2184         }
2185
2186         /*
2187         ** Handle VLAN tag
2188         */
2189         if (m_head->m_flags & M_VLANTAG) {
2190                 /* Set the vlan id. */
2191                 ctxd->upper.fields.special =
2192                     htole16(m_head->m_pkthdr.ether_vlantag);
2193                 /* Tell hardware to add tag */
2194                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2195         }
2196
2197         tx_buffer->m_head = m_head;
2198         tx_buffer_mapped->map = tx_buffer->map;
2199         tx_buffer->map = map;
2200         bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
2201
2202         /*
2203          * Last Descriptor of Packet
2204          * needs End Of Packet (EOP)
2205          * and Report Status (RS)
2206          */
2207         ctxd->lower.data |=
2208             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2209         /*
2210          * Keep track in the first buffer which
2211          * descriptor will be written back
2212          */
2213         tx_buffer = &adapter->tx_buffer_area[first];
2214         tx_buffer->next_eop = last;
2215         adapter->watchdog_time = ticks;
2216
2217         /*
2218          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2219          * that this frame is available to transmit.
2220          */
2221         bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2222             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2223         if (adapter->hw.mac.type == e1000_82547 &&
2224             adapter->link_duplex == HALF_DUPLEX)
2225                 em_82547_move_tail(adapter);
2226         else {
2227                 E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), i);
2228                 if (adapter->hw.mac.type == e1000_82547)
2229                         em_82547_update_fifo_head(adapter,
2230                             m_head->m_pkthdr.len);
2231         }
2232
2233         return (0);
2234 }
2235
2236 /*********************************************************************
2237  *
2238  * 82547 workaround to avoid controller hang in half-duplex environment.
2239  * The workaround is to avoid queuing a large packet that would span
2240  * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
2241  * in this case. We do that only when FIFO is quiescent.
2242  *
2243  **********************************************************************/
2244 static void
2245 em_82547_move_tail_locked(void *arg)
2246 {
2247         struct adapter *adapter = arg;
2248
2249         struct e1000_tx_desc *tx_desc;
2250         u16     hw_tdt, sw_tdt, length = 0;
2251         bool    eop = 0;
2252
2253         EM_TX_LOCK_ASSERT(adapter);
2254
2255         hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT(0));
2256         sw_tdt = adapter->next_avail_tx_desc;
2257         
2258         while (hw_tdt != sw_tdt) {
2259                 tx_desc = &adapter->tx_desc_base[hw_tdt];
2260                 length += tx_desc->lower.flags.length;
2261                 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
2262                 if (++hw_tdt == adapter->num_tx_desc)
2263                         hw_tdt = 0;
2264
2265                 if (eop) {
2266                         if (em_82547_fifo_workaround(adapter, length)) {
2267                                 adapter->tx_fifo_wrk_cnt++;
2268                                 callout_reset(&adapter->tx_fifo_timer, 1,
2269                                         em_82547_move_tail, adapter);
2270                                 break;
2271                         }
2272                         E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), hw_tdt);
2273                         em_82547_update_fifo_head(adapter, length);
2274                         length = 0;
2275                 }
2276         }       
2277 }
2278
2279 static void
2280 em_82547_move_tail(void *arg)
2281 {
2282         struct adapter *adapter = arg;
2283         EM_TX_LOCK(adapter);
2284         em_82547_move_tail_locked(arg);
2285         EM_TX_UNLOCK(adapter);
2286 }
2287
2288 static int
2289 em_82547_fifo_workaround(struct adapter *adapter, int len)
2290 {       
2291         int fifo_space, fifo_pkt_len;
2292
2293         fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2294
2295         if (adapter->link_duplex == HALF_DUPLEX) {
2296                 fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
2297
2298                 if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
2299                         if (em_82547_tx_fifo_reset(adapter))
2300                                 return (0);
2301                         else
2302                                 return (1);
2303                 }
2304         }
2305
2306         return (0);
2307 }
2308
2309 static void
2310 em_82547_update_fifo_head(struct adapter *adapter, int len)
2311 {
2312         int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2313         
2314         /* tx_fifo_head is always 16 byte aligned */
2315         adapter->tx_fifo_head += fifo_pkt_len;
2316         if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
2317                 adapter->tx_fifo_head -= adapter->tx_fifo_size;
2318         }
2319 }
2320
2321
2322 static int
2323 em_82547_tx_fifo_reset(struct adapter *adapter)
2324 {
2325         u32 tctl;
2326
2327         if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) ==
2328             E1000_READ_REG(&adapter->hw, E1000_TDH(0))) &&
2329             (E1000_READ_REG(&adapter->hw, E1000_TDFT) == 
2330             E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
2331             (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
2332             E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
2333             (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
2334                 /* Disable TX unit */
2335                 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2336                 E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
2337                     tctl & ~E1000_TCTL_EN);
2338
2339                 /* Reset FIFO pointers */
2340                 E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
2341                     adapter->tx_head_addr);
2342                 E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
2343                     adapter->tx_head_addr);
2344                 E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
2345                     adapter->tx_head_addr);
2346                 E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
2347                     adapter->tx_head_addr);
2348
2349                 /* Re-enable TX unit */
2350                 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2351                 E1000_WRITE_FLUSH(&adapter->hw);
2352
2353                 adapter->tx_fifo_head = 0;
2354                 adapter->tx_fifo_reset_cnt++;
2355
2356                 return (TRUE);
2357         }
2358         else {
2359                 return (FALSE);
2360         }
2361 }
2362
2363 static void
2364 em_set_promisc(struct adapter *adapter)
2365 {
2366         struct ifnet    *ifp = adapter->ifp;
2367         u32             reg_rctl;
2368
2369         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2370
2371         if (ifp->if_flags & IFF_PROMISC) {
2372                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2373                 /* Turn this on if you want to see bad packets */
2374                 if (em_debug_sbp)
2375                         reg_rctl |= E1000_RCTL_SBP;
2376                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2377         } else if (ifp->if_flags & IFF_ALLMULTI) {
2378                 reg_rctl |= E1000_RCTL_MPE;
2379                 reg_rctl &= ~E1000_RCTL_UPE;
2380                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2381         }
2382 }
2383
2384 static void
2385 em_disable_promisc(struct adapter *adapter)
2386 {
2387         u32     reg_rctl;
2388
2389         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2390
2391         reg_rctl &=  (~E1000_RCTL_UPE);
2392         reg_rctl &=  (~E1000_RCTL_MPE);
2393         reg_rctl &=  (~E1000_RCTL_SBP);
2394         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2395 }
2396
2397
2398 /*********************************************************************
2399  *  Multicast Update
2400  *
2401  *  This routine is called whenever multicast address list is updated.
2402  *
2403  **********************************************************************/
2404
2405 static void
2406 em_set_multi(struct adapter *adapter)
2407 {
2408         struct ifnet    *ifp = adapter->ifp;
2409         struct ifmultiaddr *ifma;
2410         u32 reg_rctl = 0;
2411         u8  *mta; /* Multicast array memory */
2412         int mcnt = 0;
2413
2414         IOCTL_DEBUGOUT("em_set_multi: begin");
2415
2416         if (adapter->hw.mac.type == e1000_82542 && 
2417             adapter->hw.revision_id == E1000_REVISION_2) {
2418                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2419                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2420                         e1000_pci_clear_mwi(&adapter->hw);
2421                 reg_rctl |= E1000_RCTL_RST;
2422                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2423                 msec_delay(5);
2424         }
2425
2426         /* Allocate temporary memory to setup array */
2427         mta = kmalloc(sizeof(u8) *
2428             (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
2429             M_DEVBUF, M_NOWAIT | M_ZERO);
2430         if (mta == NULL)
2431                 panic("em_set_multi memory failure\n");
2432
2433 #if 0
2434 #if __FreeBSD_version < 800000
2435         IF_ADDR_LOCK(ifp);
2436 #else
2437         if_maddr_rlock(ifp);
2438 #endif
2439 #endif
2440         LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2441                 if (ifma->ifma_addr->sa_family != AF_LINK)
2442                         continue;
2443
2444                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2445                         break;
2446
2447                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2448                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2449                 mcnt++;
2450         }
2451 #if 0
2452 #if __FreeBSD_version < 800000
2453         IF_ADDR_UNLOCK(ifp);
2454 #else
2455         if_maddr_runlock(ifp);
2456 #endif
2457 #endif
2458         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2459                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2460                 reg_rctl |= E1000_RCTL_MPE;
2461                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2462         } else
2463                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2464
2465         if (adapter->hw.mac.type == e1000_82542 && 
2466             adapter->hw.revision_id == E1000_REVISION_2) {
2467                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2468                 reg_rctl &= ~E1000_RCTL_RST;
2469                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2470                 msec_delay(5);
2471                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2472                         e1000_pci_set_mwi(&adapter->hw);
2473         }
2474         kfree(mta, M_DEVBUF);
2475 }
2476
2477
2478 /*********************************************************************
2479  *  Timer routine
2480  *
2481  *  This routine checks for link status and updates statistics.
2482  *
2483  **********************************************************************/
2484
2485 static void
2486 em_local_timer_locked(void *arg)
2487 {
2488         struct adapter  *adapter = arg;
2489         struct ifnet    *ifp = adapter->ifp;
2490
2491         EM_CORE_LOCK_ASSERT(adapter);
2492
2493 #ifndef DEVICE_POLLING
2494         taskqueue_enqueue(adapter->tq,
2495             &adapter->rxtx_task);
2496 #endif
2497         em_update_link_status(adapter);
2498         em_update_stats_counters(adapter);
2499
2500         /* Reset LAA into RAR[0] on 82571 */
2501         if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2502                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2503
2504         if (em_display_debug_stats && ifp->if_flags & IFF_RUNNING)
2505                 em_print_hw_stats(adapter);
2506
2507         em_smartspeed(adapter);
2508
2509         /*
2510          * We check the watchdog: the time since
2511          * the last TX descriptor was cleaned.
2512          * This implies a functional TX engine.
2513          */
2514         if ((adapter->watchdog_check == TRUE) &&
2515             (ticks - adapter->watchdog_time > EM_WATCHDOG))
2516                 goto hung;
2517
2518         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2519         return;
2520 hung:
2521         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2522         adapter->ifp->if_flags &= ~IFF_RUNNING;
2523         adapter->watchdog_events++;
2524         em_init_locked(adapter);
2525 }
2526
2527 static void
2528 em_local_timer(void *arg)
2529 {
2530         struct adapter  *adapter = arg;
2531
2532         EM_CORE_LOCK(adapter);
2533         em_local_timer_locked(arg);
2534         EM_CORE_UNLOCK(adapter);
2535 }
2536
2537
2538 static void
2539 em_update_link_status(struct adapter *adapter)
2540 {
2541         struct e1000_hw *hw = &adapter->hw;
2542         struct ifnet *ifp = adapter->ifp;
2543         device_t dev = adapter->dev;
2544         u32 link_check = 0;
2545
2546         /* Get the cached link value or read phy for real */
2547         switch (hw->phy.media_type) {
2548         case e1000_media_type_copper:
2549                 if (hw->mac.get_link_status) {
2550                         /* Do the work to read phy */
2551                         e1000_check_for_link(hw);
2552                         link_check = !hw->mac.get_link_status;
2553                         if (link_check) /* ESB2 fix */
2554                                 e1000_cfg_on_link_up(hw);
2555                 } else
2556                         link_check = TRUE;
2557                 break;
2558         case e1000_media_type_fiber:
2559                 e1000_check_for_link(hw);
2560                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2561                                  E1000_STATUS_LU);
2562                 break;
2563         case e1000_media_type_internal_serdes:
2564                 e1000_check_for_link(hw);
2565                 link_check = adapter->hw.mac.serdes_has_link;
2566                 break;
2567         default:
2568         case e1000_media_type_unknown:
2569                 break;
2570         }
2571
2572         /* Now check for a transition */
2573         if (link_check && (adapter->link_active == 0)) {
2574                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2575                     &adapter->link_duplex);
2576                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2577                 if ((adapter->link_speed != SPEED_1000) &&
2578                     ((hw->mac.type == e1000_82571) ||
2579                     (hw->mac.type == e1000_82572))) {
2580                         int tarc0;
2581                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2582                         tarc0 &= ~SPEED_MODE_BIT;
2583                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2584                 }
2585                 if (bootverbose)
2586                         device_printf(dev, "Link is up %d Mbps %s\n",
2587                             adapter->link_speed,
2588                             ((adapter->link_duplex == FULL_DUPLEX) ?
2589                             "Full Duplex" : "Half Duplex"));
2590                 adapter->link_active = 1;
2591                 adapter->smartspeed = 0;
2592                 ifp->if_baudrate = adapter->link_speed * 1000000;
2593                 ifp->if_link_state = LINK_STATE_UP;
2594                 if_link_state_change(ifp);
2595         } else if (!link_check && (adapter->link_active == 1)) {
2596                 ifp->if_baudrate = adapter->link_speed = 0;
2597                 adapter->link_duplex = 0;
2598                 if (bootverbose)
2599                         device_printf(dev, "Link is Down\n");
2600                 adapter->link_active = 0;
2601                 /* Link down, disable watchdog */
2602                 adapter->watchdog_check = FALSE;
2603                 ifp->if_link_state = LINK_STATE_DOWN;
2604                 if_link_state_change(ifp);
2605         }
2606 }
2607
2608 /*********************************************************************
2609  *
2610  *  This routine disables all traffic on the adapter by issuing a
2611  *  global reset on the MAC and deallocates TX/RX buffers.
2612  *
2613  *  This routine should always be called with BOTH the CORE
2614  *  and TX locks.
2615  **********************************************************************/
2616
2617 static void
2618 em_stop(void *arg)
2619 {
2620         struct adapter  *adapter = arg;
2621         struct ifnet    *ifp = adapter->ifp;
2622
2623         EM_CORE_LOCK_ASSERT(adapter);
2624         EM_TX_LOCK_ASSERT(adapter);
2625
2626         INIT_DEBUGOUT("em_stop: begin");
2627
2628         em_disable_intr(adapter);
2629         callout_stop(&adapter->timer);
2630         callout_stop(&adapter->tx_fifo_timer);
2631
2632         /* Tell the stack that the interface is no longer active */
2633         ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2634
2635         e1000_reset_hw(&adapter->hw);
2636         if (adapter->hw.mac.type >= e1000_82544)
2637                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2638 }
2639
2640
2641 /*********************************************************************
2642  *
2643  *  Determine hardware revision.
2644  *
2645  **********************************************************************/
2646 static void
2647 em_identify_hardware(struct adapter *adapter)
2648 {
2649         device_t dev = adapter->dev;
2650
2651         /* Make sure our PCI config space has the necessary stuff set */
2652         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2653         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2654             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2655                 device_printf(dev, "Memory Access and/or Bus Master bits "
2656                     "were not set!\n");
2657                 adapter->hw.bus.pci_cmd_word |=
2658                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2659                 pci_write_config(dev, PCIR_COMMAND,
2660                     adapter->hw.bus.pci_cmd_word, 2);
2661         }
2662
2663         /* Save off the information about this board */
2664         adapter->hw.vendor_id = pci_get_vendor(dev);
2665         adapter->hw.device_id = pci_get_device(dev);
2666         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2667         adapter->hw.subsystem_vendor_id =
2668             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2669         adapter->hw.subsystem_device_id =
2670             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2671
2672         /* Do Shared Code Init and Setup */
2673         if (e1000_set_mac_type(&adapter->hw)) {
2674                 device_printf(dev, "Setup init failure\n");
2675                 return;
2676         }
2677 }
2678
2679 static int
2680 em_allocate_pci_resources(struct adapter *adapter)
2681 {
2682         device_t        dev = adapter->dev;
2683         int             val, rid, error = E1000_SUCCESS;
2684
2685         rid = PCIR_BAR(0);
2686         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2687             &rid, RF_ACTIVE);
2688         if (adapter->memory == NULL) {
2689                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2690                 return (ENXIO);
2691         }
2692         adapter->osdep.mem_bus_space_tag =
2693             rman_get_bustag(adapter->memory);
2694         adapter->osdep.mem_bus_space_handle =
2695             rman_get_bushandle(adapter->memory);
2696         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2697
2698         /* Only older adapters use IO mapping */
2699         if ((adapter->hw.mac.type > e1000_82543) &&
2700             (adapter->hw.mac.type < e1000_82571)) {
2701                 /* Figure our where our IO BAR is ? */
2702                 for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2703                         val = pci_read_config(dev, rid, 4);
2704                         if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2705                                 adapter->io_rid = rid;
2706                                 break;
2707                         }
2708                         rid += 4;
2709                         /* check for 64bit BAR */
2710                         if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2711                                 rid += 4;
2712                 }
2713                 if (rid >= PCIR_CIS) {
2714                         device_printf(dev, "Unable to locate IO BAR\n");
2715                         return (ENXIO);
2716                 }
2717                 adapter->ioport = bus_alloc_resource_any(dev,
2718                     SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
2719                 if (adapter->ioport == NULL) {
2720                         device_printf(dev, "Unable to allocate bus resource: "
2721                             "ioport\n");
2722                         return (ENXIO);
2723                 }
2724                 adapter->hw.io_base = 0;
2725                 adapter->osdep.io_bus_space_tag =
2726                     rman_get_bustag(adapter->ioport);
2727                 adapter->osdep.io_bus_space_handle =
2728                     rman_get_bushandle(adapter->ioport);
2729         }
2730
2731         /*
2732         ** Init the resource arrays
2733         **  used by MSIX setup 
2734         */
2735         for (int i = 0; i < 3; i++) {
2736                 adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */
2737                 adapter->tag[i] = NULL;
2738                 adapter->res[i] = NULL;
2739         }
2740
2741         /*
2742          * Setup MSI/X or MSI if PCI Express
2743          */
2744         if (em_enable_msi)
2745                 adapter->msi = em_setup_msix(adapter);
2746
2747         adapter->hw.back = &adapter->osdep;
2748
2749         return (error);
2750 }
2751
2752 /*********************************************************************
2753  *
2754  *  Setup the Legacy or MSI Interrupt handler
2755  *
2756  **********************************************************************/
2757 int
2758 em_allocate_legacy(struct adapter *adapter)
2759 {
2760         device_t dev = adapter->dev;
2761         int error;
2762
2763         /* Manually turn off all interrupts */
2764         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2765
2766         /* Legacy RID is 0 */
2767         if (adapter->msi == 0)
2768                 adapter->rid[0] = 0;
2769
2770         /* We allocate a single interrupt resource */
2771         adapter->res[0] = bus_alloc_resource_any(dev,
2772             SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE);
2773         if (adapter->res[0] == NULL) {
2774                 device_printf(dev, "Unable to allocate bus resource: "
2775                     "interrupt\n");
2776                 return (ENXIO);
2777         }
2778
2779 #ifdef EM_LEGACY_IRQ
2780         /* We do Legacy setup */
2781         if ((error = bus_setup_intr(dev, adapter->res[0],
2782             /*INTR_TYPE_NET |*/ INTR_MPSAFE, em_intr, adapter,
2783             &adapter->tag[0], NULL)) != 0) {
2784                 device_printf(dev, "Failed to register interrupt handler");
2785                 return (error);
2786         }
2787
2788 #else /* FAST_IRQ */
2789         /*
2790          * Try allocating a fast interrupt and the associated deferred
2791          * processing contexts.
2792          */
2793         TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2794         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2795         adapter->tq = taskqueue_create("em_taskq", M_NOWAIT,
2796             taskqueue_thread_enqueue, &adapter->tq);
2797         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s taskq",
2798             device_get_nameunit(adapter->dev));
2799         if ((error = bus_setup_intr(dev, adapter->res[0],
2800             /*INTR_TYPE_NET |*/ INTR_FAST, em_irq_fast, adapter,
2801             &adapter->tag[0], NULL)) != 0) {
2802                 device_printf(dev, "Failed to register fast interrupt "
2803                             "handler: %d\n", error);
2804                 taskqueue_free(adapter->tq);
2805                 adapter->tq = NULL;
2806                 return (error);
2807         }
2808 #endif  /* EM_LEGACY_IRQ */
2809         
2810         return (0);
2811 }
2812
2813 /*********************************************************************
2814  *
2815  *  Setup the MSIX Interrupt handlers
2816  *   This is not really Multiqueue, rather
2817  *   its just multiple interrupt vectors.
2818  *
2819  **********************************************************************/
2820 int
2821 em_allocate_msix(struct adapter *adapter)
2822 {
2823         device_t dev = adapter->dev;
2824         int error;
2825
2826         /* Make sure all interrupts are disabled */
2827         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2828
2829         /* First get the resources */
2830         for (int i = 0; i < adapter->msi; i++) {
2831                 adapter->res[i] = bus_alloc_resource_any(dev,
2832                     SYS_RES_IRQ, &adapter->rid[i], RF_ACTIVE);
2833                 if (adapter->res[i] == NULL) {
2834                         device_printf(dev,
2835                             "Unable to allocate bus resource: "
2836                             "MSIX Interrupt\n");
2837                         return (ENXIO);
2838                 }
2839         }
2840
2841         /*
2842          * Now allocate deferred processing contexts.
2843          */
2844         TASK_INIT(&adapter->rx_task, 0, em_handle_rx, adapter);
2845         TASK_INIT(&adapter->tx_task, 0, em_handle_tx, adapter);
2846         /*
2847          * Handle compatibility for msi case for deferral due to
2848          * trylock failure
2849          */
2850         TASK_INIT(&adapter->rxtx_task, 0, em_handle_tx, adapter);
2851         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2852         adapter->tq = taskqueue_create("em_taskq", M_NOWAIT,
2853             taskqueue_thread_enqueue, &adapter->tq);
2854         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s taskq",
2855             device_get_nameunit(adapter->dev));
2856
2857         /*
2858          * And setup the interrupt handlers
2859          */
2860
2861         /* First slot to RX */
2862         if ((error = bus_setup_intr(dev, adapter->res[0],
2863             /*INTR_TYPE_NET |*/ INTR_MPSAFE, em_msix_rx, adapter,
2864             &adapter->tag[0], NULL)) != 0) {
2865                 device_printf(dev, "Failed to register RX handler");
2866                 return (error);
2867         }
2868
2869         /* Next TX */
2870         if ((error = bus_setup_intr(dev, adapter->res[1],
2871             /*INTR_TYPE_NET |*/ INTR_MPSAFE, em_msix_tx, adapter,
2872             &adapter->tag[1], NULL)) != 0) {
2873                 device_printf(dev, "Failed to register TX handler");
2874                 return (error);
2875         }
2876
2877         /* And Link */
2878         if ((error = bus_setup_intr(dev, adapter->res[2],
2879             /*INTR_TYPE_NET |*/ INTR_MPSAFE, em_msix_link, adapter,
2880             &adapter->tag[2], NULL)) != 0) {
2881                 device_printf(dev, "Failed to register TX handler");
2882                 return (error);
2883         }
2884
2885         return (0);
2886 }
2887
2888
2889 static void
2890 em_free_pci_resources(struct adapter *adapter)
2891 {
2892         device_t dev = adapter->dev;
2893
2894         /* Make sure the for loop below runs once */
2895         if (adapter->msi == 0)
2896                 adapter->msi = 1;
2897
2898         /*
2899          * First release all the interrupt resources:
2900          *      notice that since these are just kept
2901          *      in an array we can do the same logic
2902          *      whether its MSIX or just legacy.
2903          */
2904         for (int i = 0; i < adapter->msi; i++) {
2905                 if (adapter->tag[i] != NULL) {
2906                         bus_teardown_intr(dev, adapter->res[i],
2907                             adapter->tag[i]);
2908                         adapter->tag[i] = NULL;
2909                 }
2910                 if (adapter->res[i] != NULL) {
2911                         bus_release_resource(dev, SYS_RES_IRQ,
2912                             adapter->rid[i], adapter->res[i]);
2913                 }
2914         }
2915
2916         if (adapter->msi)
2917                 pci_release_msi(dev);
2918
2919         if (adapter->msix != NULL)
2920                 bus_release_resource(dev, SYS_RES_MEMORY,
2921                     PCIR_BAR(EM_MSIX_BAR), adapter->msix);
2922
2923         if (adapter->memory != NULL)
2924                 bus_release_resource(dev, SYS_RES_MEMORY,
2925                     PCIR_BAR(0), adapter->memory);
2926
2927         if (adapter->flash != NULL)
2928                 bus_release_resource(dev, SYS_RES_MEMORY,
2929                     EM_FLASH, adapter->flash);
2930
2931         if (adapter->ioport != NULL)
2932                 bus_release_resource(dev, SYS_RES_IOPORT,
2933                     adapter->io_rid, adapter->ioport);
2934 }
2935
2936 /*
2937  * Setup MSI or MSI/X
2938  */
2939 static int
2940 em_setup_msix(struct adapter *adapter)
2941 {
2942         device_t dev = adapter->dev;
2943         int val = 0;
2944
2945         if (adapter->hw.mac.type < e1000_82571)
2946                 return (0);
2947
2948         /* Setup MSI/X for Hartwell */
2949         if (adapter->hw.mac.type == e1000_82574) {
2950                 /* Map the MSIX BAR */
2951                 int rid = PCIR_BAR(EM_MSIX_BAR);
2952                 adapter->msix = bus_alloc_resource_any(dev,
2953                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2954                 if (!adapter->msix) {
2955                         /* May not be enabled */
2956                         device_printf(adapter->dev,
2957                             "Unable to map MSIX table \n");
2958                         goto msi;
2959                 }
2960                 val = pci_msix_count(dev); 
2961                 /*
2962                 ** 82574 can be configured for 5 but
2963                 ** we limit use to 3.
2964                 */
2965                 if (val > 3) val = 3;
2966                 if ((val) && pci_alloc_msix(dev, &val) == 0) {
2967                         device_printf(adapter->dev,"Using MSIX interrupts\n");
2968                         return (val);
2969                 }
2970         }
2971 msi:
2972         val = pci_msi_count(dev);
2973         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2974                 adapter->msi = 1;
2975                 device_printf(adapter->dev,"Using MSI interrupt\n");
2976                 return (val);
2977         } 
2978         return (0);
2979 }
2980
2981 /*********************************************************************
2982  *
2983  *  Initialize the hardware to a configuration
2984  *  as specified by the adapter structure.
2985  *
2986  **********************************************************************/
2987 static int
2988 em_hardware_init(struct adapter *adapter)
2989 {
2990         device_t dev = adapter->dev;
2991         u16     rx_buffer_size;
2992
2993         INIT_DEBUGOUT("em_hardware_init: begin");
2994
2995         /* Issue a global reset */
2996         e1000_reset_hw(&adapter->hw);
2997
2998         /* When hardware is reset, fifo_head is also reset */
2999         adapter->tx_fifo_head = 0;
3000
3001         /* Set up smart power down as default off on newer adapters. */
3002         if (!em_smart_pwr_down && (adapter->hw.mac.type == e1000_82571 ||
3003             adapter->hw.mac.type == e1000_82572)) {
3004                 u16 phy_tmp = 0;
3005
3006                 /* Speed up time to link by disabling smart power down. */
3007                 e1000_read_phy_reg(&adapter->hw,
3008                     IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3009                 phy_tmp &= ~IGP02E1000_PM_SPD;
3010                 e1000_write_phy_reg(&adapter->hw,
3011                     IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3012         }
3013
3014         /*
3015          * These parameters control the automatic generation (Tx) and
3016          * response (Rx) to Ethernet PAUSE frames.
3017          * - High water mark should allow for at least two frames to be
3018          *   received after sending an XOFF.
3019          * - Low water mark works best when it is very near the high water mark.
3020          *   This allows the receiver to restart by sending XON when it has
3021          *   drained a bit. Here we use an arbitary value of 1500 which will
3022          *   restart after one full frame is pulled from the buffer. There
3023          *   could be several smaller frames in the buffer and if so they will
3024          *   not trigger the XON until their total number reduces the buffer
3025          *   by 1500.
3026          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3027          */
3028         rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
3029             0xffff) << 10 );
3030
3031         adapter->hw.fc.high_water = rx_buffer_size -
3032             roundup2(adapter->max_frame_size, 1024);
3033         adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
3034
3035         if (adapter->hw.mac.type == e1000_80003es2lan)
3036                 adapter->hw.fc.pause_time = 0xFFFF;
3037         else
3038                 adapter->hw.fc.pause_time = EM_FC_PAUSE_TIME;
3039         adapter->hw.fc.send_xon = TRUE;
3040
3041         /* Set Flow control, use the tunable location if sane */
3042         if ((em_fc_setting >= 0) || (em_fc_setting < 4))
3043                 adapter->hw.fc.requested_mode = em_fc_setting;
3044         else
3045                 adapter->hw.fc.requested_mode = e1000_fc_none;
3046
3047         /* Override - workaround for PCHLAN issue */
3048         if (adapter->hw.mac.type == e1000_pchlan)
3049                 adapter->hw.fc.requested_mode = e1000_fc_rx_pause;
3050
3051         if (e1000_init_hw(&adapter->hw) < 0) {
3052                 device_printf(dev, "Hardware Initialization Failed\n");
3053                 return (EIO);
3054         }
3055
3056         e1000_check_for_link(&adapter->hw);
3057
3058         return (0);
3059 }
3060
3061 /*********************************************************************
3062  *
3063  *  Setup networking device structure and register an interface.
3064  *
3065  **********************************************************************/
3066 static void
3067 em_setup_interface(device_t dev, struct adapter *adapter)
3068 {
3069         struct ifnet   *ifp;
3070
3071         INIT_DEBUGOUT("em_setup_interface: begin");
3072
3073         ifp = adapter->ifp = &adapter->arpcom.ac_if;
3074         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3075         ifp->if_mtu = ETHERMTU;
3076         ifp->if_init =  em_init;
3077         ifp->if_softc = adapter;
3078         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3079         ifp->if_ioctl = em_ioctl;
3080         ifp->if_start = em_start;
3081         ifq_set_maxlen(&ifp->if_snd, adapter->num_tx_desc - 1);
3082         ifq_set_ready(&ifp->if_snd);
3083
3084         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
3085
3086         ifp->if_capabilities = ifp->if_capenable = 0;
3087
3088 #if __FreeBSD_version >= 800000
3089         /* Multiqueue tx functions */
3090         ifp->if_transmit = em_mq_start;
3091         ifp->if_qflush = em_qflush;
3092         adapter->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &adapter->tx_mtx);
3093 #endif  
3094         if (adapter->hw.mac.type >= e1000_82543) {
3095                 int version_cap;
3096                 version_cap = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3097                 ifp->if_capabilities |= version_cap;
3098                 ifp->if_capenable |= version_cap;
3099         }
3100
3101 #ifdef NET_TSO
3102         /* Identify TSO capable adapters */
3103         if ((adapter->hw.mac.type > e1000_82544) &&
3104             (adapter->hw.mac.type != e1000_82547))
3105                 ifp->if_capabilities |= IFCAP_TSO4;
3106         /*
3107          * By default only enable on PCI-E, this
3108          * can be overriden by ifconfig.
3109          */
3110         if (adapter->hw.mac.type >= e1000_82571)
3111                 ifp->if_capenable |= IFCAP_TSO4;
3112 #endif
3113         /*
3114          * Tell the upper layer(s) we
3115          * support full VLAN capability
3116          */
3117         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3118         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
3119         ifp->if_capenable |= (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING);
3120
3121         /*
3122         ** Dont turn this on by default, if vlans are
3123         ** created on another pseudo device (eg. lagg)
3124         ** then vlan events are not passed thru, breaking
3125         ** operation, but with HW FILTER off it works. If
3126         ** using vlans directly on the em driver you can
3127         ** enable this and get full hardware tag filtering. 
3128         */
3129         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3130
3131 #ifdef DEVICE_POLLING
3132         ifp->if_capabilities |= IFCAP_POLLING;
3133 #endif
3134
3135         /* Limit WOL to MAGIC, not clear others are used */
3136         if (adapter->wol) {
3137                 ifp->if_capabilities |= IFCAP_WOL_MAGIC;
3138                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3139         }
3140                 
3141         /*
3142          * Specify the media types supported by this adapter and register
3143          * callbacks to update media and link information
3144          */
3145         ifmedia_init(&adapter->media, IFM_IMASK,
3146             em_media_change, em_media_status);
3147         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3148             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3149                 u_char fiber_type = IFM_1000_SX;        /* default type */
3150
3151                 if (adapter->hw.mac.type == e1000_82545)
3152                         fiber_type = IFM_1000_LX;
3153                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3154                             0, NULL);
3155                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3156         } else {
3157                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3158                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3159                             0, NULL);
3160                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3161                             0, NULL);
3162                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3163                             0, NULL);
3164                 if (adapter->hw.phy.type != e1000_phy_ife) {
3165                         ifmedia_add(&adapter->media,
3166                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3167                         ifmedia_add(&adapter->media,
3168                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3169                 }
3170         }
3171         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3172         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3173 }
3174
3175
3176 /*********************************************************************
3177  *
3178  *  Workaround for SmartSpeed on 82541 and 82547 controllers
3179  *
3180  **********************************************************************/
3181 static void
3182 em_smartspeed(struct adapter *adapter)
3183 {
3184         u16 phy_tmp;
3185
3186         if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
3187             adapter->hw.mac.autoneg == 0 ||
3188             (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
3189                 return;
3190
3191         if (adapter->smartspeed == 0) {
3192                 /* If Master/Slave config fault is asserted twice,
3193                  * we assume back-to-back */
3194                 e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
3195                 if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
3196                         return;
3197                 e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
3198                 if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
3199                         e1000_read_phy_reg(&adapter->hw,
3200                             PHY_1000T_CTRL, &phy_tmp);
3201                         if(phy_tmp & CR_1000T_MS_ENABLE) {
3202                                 phy_tmp &= ~CR_1000T_MS_ENABLE;
3203                                 e1000_write_phy_reg(&adapter->hw,
3204                                     PHY_1000T_CTRL, phy_tmp);
3205                                 adapter->smartspeed++;
3206                                 if(adapter->hw.mac.autoneg &&
3207                                    !e1000_copper_link_autoneg(&adapter->hw) &&
3208                                    !e1000_read_phy_reg(&adapter->hw,
3209                                     PHY_CONTROL, &phy_tmp)) {
3210                                         phy_tmp |= (MII_CR_AUTO_NEG_EN |
3211                                                     MII_CR_RESTART_AUTO_NEG);
3212                                         e1000_write_phy_reg(&adapter->hw,
3213                                             PHY_CONTROL, phy_tmp);
3214                                 }
3215                         }
3216                 }
3217                 return;
3218         } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
3219                 /* If still no link, perhaps using 2/3 pair cable */
3220                 e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
3221                 phy_tmp |= CR_1000T_MS_ENABLE;
3222                 e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
3223                 if(adapter->hw.mac.autoneg &&
3224                    !e1000_copper_link_autoneg(&adapter->hw) &&
3225                    !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
3226                         phy_tmp |= (MII_CR_AUTO_NEG_EN |
3227                                     MII_CR_RESTART_AUTO_NEG);
3228                         e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
3229                 }
3230         }
3231         /* Restart process after EM_SMARTSPEED_MAX iterations */
3232         if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
3233                 adapter->smartspeed = 0;
3234 }
3235
3236
3237 /*
3238  * Manage DMA'able memory.
3239  */
3240 static void
3241 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3242 {
3243         if (error)
3244                 return;
3245         *(bus_addr_t *) arg = segs[0].ds_addr;
3246 }
3247
3248 static int
3249 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3250         struct em_dma_alloc *dma, int mapflags)
3251 {
3252         int error;
3253
3254         error = bus_dma_tag_create(NULL,                /* parent */
3255                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3256                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3257                                 BUS_SPACE_MAXADDR,      /* highaddr */
3258                                 NULL, NULL,             /* filter, filterarg */
3259                                 size,                   /* maxsize */
3260                                 1,                      /* nsegments */
3261                                 size,                   /* maxsegsize */
3262                                 0,                      /* flags */
3263                                 &dma->dma_tag);
3264         if (error) {
3265                 device_printf(adapter->dev,
3266                     "%s: bus_dma_tag_create failed: %d\n",
3267                     __func__, error);
3268                 goto fail_0;
3269         }
3270
3271         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3272             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3273         if (error) {
3274                 device_printf(adapter->dev,
3275                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3276                     __func__, (uintmax_t)size, error);
3277                 goto fail_2;
3278         }
3279
3280         dma->dma_paddr = 0;
3281         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3282             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3283         if (error || dma->dma_paddr == 0) {
3284                 device_printf(adapter->dev,
3285                     "%s: bus_dmamap_load failed: %d\n",
3286                     __func__, error);
3287                 goto fail_3;
3288         }
3289
3290         return (0);
3291
3292 fail_3:
3293         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3294 fail_2:
3295         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3296         bus_dma_tag_destroy(dma->dma_tag);
3297 fail_0:
3298         dma->dma_map = NULL;
3299         dma->dma_tag = NULL;
3300
3301         return (error);
3302 }
3303
3304 static void
3305 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3306 {
3307         if (dma->dma_tag == NULL)
3308                 return;
3309         if (dma->dma_map != NULL) {
3310                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3311                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3312                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3313                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3314                 dma->dma_map = NULL;
3315         }
3316         bus_dma_tag_destroy(dma->dma_tag);
3317         dma->dma_tag = NULL;
3318 }
3319
3320
3321 /*********************************************************************
3322  *
3323  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3324  *  the information needed to transmit a packet on the wire.
3325  *
3326  **********************************************************************/
3327 static int
3328 em_allocate_transmit_structures(struct adapter *adapter)
3329 {
3330         device_t dev = adapter->dev;
3331         struct em_buffer *tx_buffer;
3332         int error;
3333
3334         /*
3335          * Create DMA tags for tx descriptors
3336          */
3337         if ((error = bus_dma_tag_create(NULL,           /* parent */
3338                                 1, 0,                   /* alignment, bounds */
3339                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3340                                 BUS_SPACE_MAXADDR,      /* highaddr */
3341                                 NULL, NULL,             /* filter, filterarg */
3342                                 EM_TSO_SIZE,            /* maxsize */
3343                                 EM_MAX_SCATTER,         /* nsegments */
3344                                 EM_TSO_SEG_SIZE,        /* maxsegsize */
3345                                 0,                      /* flags */
3346                                 &adapter->txtag)) != 0) {
3347                 device_printf(dev, "Unable to allocate TX DMA tag\n");
3348                 goto fail;
3349         }
3350
3351         adapter->tx_buffer_area = kmalloc(sizeof(struct em_buffer) *
3352             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3353         if (adapter->tx_buffer_area == NULL) {
3354                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3355                 error = ENOMEM;
3356                 goto fail;
3357         }
3358
3359         /* Create the descriptor buffer dma maps */
3360         for (int i = 0; i < adapter->num_tx_desc; i++) {
3361                 tx_buffer = &adapter->tx_buffer_area[i];
3362                 error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
3363                 if (error != 0) {
3364                         device_printf(dev, "Unable to create TX DMA map\n");
3365                         goto fail;
3366                 }
3367                 tx_buffer->next_eop = -1;
3368         }
3369
3370         return (0);
3371 fail:
3372         em_free_transmit_structures(adapter);
3373         return (error);
3374 }
3375
3376 /*********************************************************************
3377  *
3378  *  (Re)Initialize transmit structures.
3379  *
3380  **********************************************************************/
3381 static void
3382 em_setup_transmit_structures(struct adapter *adapter)
3383 {
3384         struct em_buffer *tx_buffer;
3385
3386         /* Clear the old ring contents */
3387         bzero(adapter->tx_desc_base,
3388             (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3389
3390         /* Free any existing TX buffers */
3391         for (int i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3392                 tx_buffer = &adapter->tx_buffer_area[i];
3393                 bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3394                     BUS_DMASYNC_POSTWRITE);
3395                 bus_dmamap_unload(adapter->txtag, tx_buffer->map);
3396                 m_freem(tx_buffer->m_head);
3397                 tx_buffer->m_head = NULL;
3398                 tx_buffer->next_eop = -1;
3399         }
3400
3401         /* Reset state */
3402         adapter->next_avail_tx_desc = 0;
3403         adapter->next_tx_to_clean = 0;
3404         adapter->num_tx_desc_avail = adapter->num_tx_desc;
3405
3406         bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3407             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3408
3409         return;
3410 }
3411
3412 /*********************************************************************
3413  *
3414  *  Enable transmit unit.
3415  *
3416  **********************************************************************/
3417 static void
3418 em_initialize_transmit_unit(struct adapter *adapter)
3419 {
3420         u32     tctl, tarc, tipg = 0;
3421         u64     bus_addr;
3422
3423          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3424         /* Setup the Base and Length of the Tx Descriptor Ring */
3425         bus_addr = adapter->txdma.dma_paddr;
3426         E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0),
3427             adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3428         E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0),
3429             (u32)(bus_addr >> 32));
3430         E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0),
3431             (u32)bus_addr);
3432         /* Setup the HW Tx Head and Tail descriptor pointers */
3433         E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0);
3434         E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0);
3435
3436         HW_DEBUGOUT2("Base = %x, Length = %x\n",
3437             E1000_READ_REG(&adapter->hw, E1000_TDBAL(0)),
3438             E1000_READ_REG(&adapter->hw, E1000_TDLEN(0)));
3439
3440         /* Set the default values for the Tx Inter Packet Gap timer */
3441         switch (adapter->hw.mac.type) {
3442         case e1000_82542:
3443                 tipg = DEFAULT_82542_TIPG_IPGT;
3444                 tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3445                 tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3446                 break;
3447         case e1000_80003es2lan:
3448                 tipg = DEFAULT_82543_TIPG_IPGR1;
3449                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3450                     E1000_TIPG_IPGR2_SHIFT;
3451                 break;
3452         default:
3453                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3454                     (adapter->hw.phy.media_type ==
3455                     e1000_media_type_internal_serdes))
3456                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3457                 else
3458                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3459                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3460                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3461         }
3462
3463         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3464         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3465         if(adapter->hw.mac.type >= e1000_82540)
3466                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3467                     adapter->tx_abs_int_delay.value);
3468
3469         if ((adapter->hw.mac.type == e1000_82571) ||
3470             (adapter->hw.mac.type == e1000_82572)) {
3471                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3472                 tarc |= SPEED_MODE_BIT;
3473                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3474         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3475                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3476                 tarc |= 1;
3477                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3478                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3479                 tarc |= 1;
3480                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3481         }
3482
3483         /* Program the Transmit Control Register */
3484         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3485         tctl &= ~E1000_TCTL_CT;
3486         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3487                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3488
3489         if (adapter->hw.mac.type >= e1000_82571)
3490                 tctl |= E1000_TCTL_MULR;
3491
3492         /* This write will effectively turn on the transmit unit. */
3493         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3494
3495         /* Setup Transmit Descriptor Base Settings */   
3496         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3497
3498         if (adapter->tx_int_delay.value > 0)
3499                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3500 }
3501
3502 /*********************************************************************
3503  *
3504  *  Free all transmit related data structures.
3505  *
3506  **********************************************************************/
3507 static void
3508 em_free_transmit_structures(struct adapter *adapter)
3509 {
3510         struct em_buffer *tx_buffer;
3511
3512         INIT_DEBUGOUT("free_transmit_structures: begin");
3513
3514         if (adapter->tx_buffer_area != NULL) {
3515                 for (int i = 0; i < adapter->num_tx_desc; i++) {
3516                         tx_buffer = &adapter->tx_buffer_area[i];
3517                         if (tx_buffer->m_head != NULL) {
3518                                 bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3519                                     BUS_DMASYNC_POSTWRITE);
3520                                 bus_dmamap_unload(adapter->txtag,
3521                                     tx_buffer->map);
3522                                 m_freem(tx_buffer->m_head);
3523                                 tx_buffer->m_head = NULL;
3524                         } else if (tx_buffer->map != NULL)
3525                                 bus_dmamap_unload(adapter->txtag,
3526                                     tx_buffer->map);
3527                         if (tx_buffer->map != NULL) {
3528                                 bus_dmamap_destroy(adapter->txtag,
3529                                     tx_buffer->map);
3530                                 tx_buffer->map = NULL;
3531                         }
3532                 }
3533         }
3534         if (adapter->tx_buffer_area != NULL) {
3535                 kfree(adapter->tx_buffer_area, M_DEVBUF);
3536                 adapter->tx_buffer_area = NULL;
3537         }
3538         if (adapter->txtag != NULL) {
3539                 bus_dma_tag_destroy(adapter->txtag);
3540                 adapter->txtag = NULL;
3541         }
3542 #if __FreeBSD_version >= 800000
3543         if (adapter->br != NULL)
3544                 buf_ring_free(adapter->br, M_DEVBUF);
3545 #endif
3546 }
3547
3548 /*********************************************************************
3549  *
3550  *  The offload context needs to be set when we transfer the first
3551  *  packet of a particular protocol (TCP/UDP). This routine has been
3552  *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3553  *
3554  *  Added back the old method of keeping the current context type
3555  *  and not setting if unnecessary, as this is reported to be a
3556  *  big performance win.  -jfv
3557  **********************************************************************/
3558 static void
3559 em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
3560     u32 *txd_upper, u32 *txd_lower)
3561 {
3562         struct e1000_context_desc *TXD = NULL;
3563         struct em_buffer *tx_buffer;
3564         struct ether_vlan_header *eh;
3565         struct ip *ip = NULL;
3566         struct ip6_hdr *ip6;
3567         int curr_txd, ehdrlen;
3568         u32 cmd, hdr_len, ip_hlen;
3569         u16 etype;
3570         u8 ipproto;
3571
3572
3573         cmd = hdr_len = ipproto = 0;
3574         curr_txd = adapter->next_avail_tx_desc;
3575
3576         /*
3577          * Determine where frame payload starts.
3578          * Jump over vlan headers if already present,
3579          * helpful for QinQ too.
3580          */
3581         eh = mtod(mp, struct ether_vlan_header *);
3582         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3583                 etype = ntohs(eh->evl_proto);
3584                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3585         } else {
3586                 etype = ntohs(eh->evl_encap_proto);
3587                 ehdrlen = ETHER_HDR_LEN;
3588         }
3589
3590         /*
3591          * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3592          * TODO: Support SCTP too when it hits the tree.
3593          */
3594         switch (etype) {
3595         case ETHERTYPE_IP:
3596                 ip = (struct ip *)(mp->m_data + ehdrlen);
3597                 ip_hlen = ip->ip_hl << 2;
3598
3599                 /* Setup of IP header checksum. */
3600                 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3601                         /*
3602                          * Start offset for header checksum calculation.
3603                          * End offset for header checksum calculation.
3604                          * Offset of place to put the checksum.
3605                          */
3606                         TXD = (struct e1000_context_desc *)
3607                             &adapter->tx_desc_base[curr_txd];
3608                         TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3609                         TXD->lower_setup.ip_fields.ipcse =
3610                             htole16(ehdrlen + ip_hlen);
3611                         TXD->lower_setup.ip_fields.ipcso =
3612                             ehdrlen + offsetof(struct ip, ip_sum);
3613                         cmd |= E1000_TXD_CMD_IP;
3614                         *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3615                 }
3616
3617                 if (mp->m_len < ehdrlen + ip_hlen)
3618                         return; /* failure */
3619
3620                 hdr_len = ehdrlen + ip_hlen;
3621                 ipproto = ip->ip_p;
3622
3623                 break;
3624         case ETHERTYPE_IPV6:
3625                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3626                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3627
3628                 if (mp->m_len < ehdrlen + ip_hlen)
3629                         return; /* failure */
3630
3631                 /* IPv6 doesn't have a header checksum. */
3632
3633                 hdr_len = ehdrlen + ip_hlen;
3634                 ipproto = ip6->ip6_nxt;
3635
3636                 break;
3637         default:
3638                 *txd_upper = 0;
3639                 *txd_lower = 0;
3640                 return;
3641         }
3642
3643         switch (ipproto) {
3644         case IPPROTO_TCP:
3645                 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3646                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3647                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3648                         /* no need for context if already set */
3649                         if (adapter->last_hw_offload == CSUM_TCP)
3650                                 return;
3651                         adapter->last_hw_offload = CSUM_TCP;
3652                         /*
3653                          * Start offset for payload checksum calculation.
3654                          * End offset for payload checksum calculation.
3655                          * Offset of place to put the checksum.
3656                          */
3657                         TXD = (struct e1000_context_desc *)
3658                             &adapter->tx_desc_base[curr_txd];
3659                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3660                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3661                         TXD->upper_setup.tcp_fields.tucso =
3662                             hdr_len + offsetof(struct tcphdr, th_sum);
3663                         cmd |= E1000_TXD_CMD_TCP;
3664                 }
3665                 break;
3666         case IPPROTO_UDP:
3667         {
3668                 if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3669                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3670                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3671                         /* no need for context if already set */
3672                         if (adapter->last_hw_offload == CSUM_UDP)
3673                                 return;
3674                         adapter->last_hw_offload = CSUM_UDP;
3675                         /*
3676                          * Start offset for header checksum calculation.
3677                          * End offset for header checksum calculation.
3678                          * Offset of place to put the checksum.
3679                          */
3680                         TXD = (struct e1000_context_desc *)
3681                             &adapter->tx_desc_base[curr_txd];
3682                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3683                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3684                         TXD->upper_setup.tcp_fields.tucso =
3685                             hdr_len + offsetof(struct udphdr, uh_sum);
3686                 }
3687                 /* Fall Thru */
3688         }
3689         default:
3690                 break;
3691         }
3692
3693         TXD->tcp_seg_setup.data = htole32(0);
3694         TXD->cmd_and_length =
3695             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3696         tx_buffer = &adapter->tx_buffer_area[curr_txd];
3697         tx_buffer->m_head = NULL;
3698         tx_buffer->next_eop = -1;
3699
3700         if (++curr_txd == adapter->num_tx_desc)
3701                 curr_txd = 0;
3702
3703         adapter->num_tx_desc_avail--;
3704         adapter->next_avail_tx_desc = curr_txd;
3705 }
3706
3707
3708 #ifdef NET_TSO
3709 /**********************************************************************
3710  *
3711  *  Setup work for hardware segmentation offload (TSO)
3712  *
3713  **********************************************************************/
3714 static bool
3715 em_tso_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper,
3716    u32 *txd_lower)
3717 {
3718         struct e1000_context_desc *TXD;
3719         struct em_buffer *tx_buffer;
3720         struct ether_vlan_header *eh;
3721         struct ip *ip;
3722         struct ip6_hdr *ip6;
3723         struct tcphdr *th;
3724         int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
3725         u16 etype;
3726
3727         /*
3728          * This function could/should be extended to support IP/IPv6
3729          * fragmentation as well.  But as they say, one step at a time.
3730          */
3731
3732         /*
3733          * Determine where frame payload starts.
3734          * Jump over vlan headers if already present,
3735          * helpful for QinQ too.
3736          */
3737         eh = mtod(mp, struct ether_vlan_header *);
3738         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3739                 etype = ntohs(eh->evl_proto);
3740                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3741         } else {
3742                 etype = ntohs(eh->evl_encap_proto);
3743                 ehdrlen = ETHER_HDR_LEN;
3744         }
3745
3746         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3747         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3748                 return FALSE;   /* -1 */
3749
3750         /*
3751          * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3752          * TODO: Support SCTP too when it hits the tree.
3753          */
3754         switch (etype) {
3755         case ETHERTYPE_IP:
3756                 isip6 = 0;
3757                 ip = (struct ip *)(mp->m_data + ehdrlen);
3758                 if (ip->ip_p != IPPROTO_TCP)
3759                         return FALSE;   /* 0 */
3760                 ip->ip_len = 0;
3761                 ip->ip_sum = 0;
3762                 ip_hlen = ip->ip_hl << 2;
3763                 if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3764                         return FALSE;   /* -1 */
3765                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3766 #if 1
3767                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3768                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3769 #else
3770                 th->th_sum = mp->m_pkthdr.csum_data;
3771 #endif
3772                 break;
3773         case ETHERTYPE_IPV6:
3774                 isip6 = 1;
3775                 return FALSE;                   /* Not supported yet. */
3776                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3777                 if (ip6->ip6_nxt != IPPROTO_TCP)
3778                         return FALSE;   /* 0 */
3779                 ip6->ip6_plen = 0;
3780                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3781                 if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3782                         return FALSE;   /* -1 */
3783                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3784 #if 0
3785                 th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3786                     htons(IPPROTO_TCP));        /* XXX: function notyet. */
3787 #else
3788                 th->th_sum = mp->m_pkthdr.csum_data;
3789 #endif
3790                 break;
3791         default:
3792                 return FALSE;
3793         }
3794         hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3795
3796         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3797                       E1000_TXD_DTYP_D |        /* Data descr type */
3798                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3799
3800         /* IP and/or TCP header checksum calculation and insertion. */
3801         *txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3802                       E1000_TXD_POPTS_TXSM) << 8;
3803
3804         curr_txd = adapter->next_avail_tx_desc;
3805         tx_buffer = &adapter->tx_buffer_area[curr_txd];
3806         TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3807
3808         /* IPv6 doesn't have a header checksum. */
3809         if (!isip6) {
3810                 /*
3811                  * Start offset for header checksum calculation.
3812                  * End offset for header checksum calculation.
3813                  * Offset of place put the checksum.
3814                  */
3815                 TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3816                 TXD->lower_setup.ip_fields.ipcse =
3817                     htole16(ehdrlen + ip_hlen - 1);
3818                 TXD->lower_setup.ip_fields.ipcso =
3819                     ehdrlen + offsetof(struct ip, ip_sum);
3820         }
3821         /*
3822          * Start offset for payload checksum calculation.
3823          * End offset for payload checksum calculation.
3824          * Offset of place to put the checksum.
3825          */
3826         TXD->upper_setup.tcp_fields.tucss =
3827             ehdrlen + ip_hlen;
3828         TXD->upper_setup.tcp_fields.tucse = 0;
3829         TXD->upper_setup.tcp_fields.tucso =
3830             ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3831         /*
3832          * Payload size per packet w/o any headers.
3833          * Length of all headers up to payload.
3834          */
3835         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3836         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3837
3838         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3839                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3840                                 E1000_TXD_CMD_TSE |     /* TSE context */
3841                                 (isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3842                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3843                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3844
3845         tx_buffer->m_head = NULL;
3846         tx_buffer->next_eop = -1;
3847
3848         if (++curr_txd == adapter->num_tx_desc)
3849                 curr_txd = 0;
3850
3851         adapter->num_tx_desc_avail--;
3852         adapter->next_avail_tx_desc = curr_txd;
3853         adapter->tx_tso = TRUE;
3854
3855         return TRUE;
3856 }
3857
3858 #endif
3859
3860 /**********************************************************************
3861  *
3862  *  Examine each tx_buffer in the used queue. If the hardware is done
3863  *  processing the packet then free associated resources. The
3864  *  tx_buffer is put back on the free queue.
3865  *
3866  **********************************************************************/
3867 static void
3868 em_txeof(struct adapter *adapter)
3869 {
3870         int first, last, done, num_avail;
3871         struct em_buffer *tx_buffer;
3872         struct e1000_tx_desc   *tx_desc, *eop_desc;
3873         struct ifnet   *ifp = adapter->ifp;
3874
3875         EM_TX_LOCK_ASSERT(adapter);
3876
3877         if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3878                 return;
3879
3880         num_avail = adapter->num_tx_desc_avail;
3881         first = adapter->next_tx_to_clean;
3882         tx_desc = &adapter->tx_desc_base[first];
3883         tx_buffer = &adapter->tx_buffer_area[first];
3884         last = tx_buffer->next_eop;
3885         eop_desc = &adapter->tx_desc_base[last];
3886
3887         /*
3888          * What this does is get the index of the
3889          * first descriptor AFTER the EOP of the 
3890          * first packet, that way we can do the
3891          * simple comparison on the inner while loop.
3892          */
3893         if (++last == adapter->num_tx_desc)
3894                 last = 0;
3895         done = last;
3896
3897         bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3898             BUS_DMASYNC_POSTREAD);
3899
3900         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3901                 /* We clean the range of the packet */
3902                 while (first != done) {
3903                         tx_desc->upper.data = 0;
3904                         tx_desc->lower.data = 0;
3905                         tx_desc->buffer_addr = 0;
3906                         ++num_avail;
3907
3908                         if (tx_buffer->m_head) {
3909                                 ifp->if_opackets++;
3910                                 bus_dmamap_sync(adapter->txtag,
3911                                     tx_buffer->map,
3912                                     BUS_DMASYNC_POSTWRITE);
3913                                 bus_dmamap_unload(adapter->txtag,
3914                                     tx_buffer->map);
3915
3916                                 m_freem(tx_buffer->m_head);
3917                                 tx_buffer->m_head = NULL;
3918                         }
3919                         tx_buffer->next_eop = -1;
3920                         adapter->watchdog_time = ticks;
3921
3922                         if (++first == adapter->num_tx_desc)
3923                                 first = 0;
3924
3925                         tx_buffer = &adapter->tx_buffer_area[first];
3926                         tx_desc = &adapter->tx_desc_base[first];
3927                 }
3928                 /* See if we can continue to the next packet */
3929                 last = tx_buffer->next_eop;
3930                 if (last != -1) {
3931                         eop_desc = &adapter->tx_desc_base[last];
3932                         /* Get new done point */
3933                         if (++last == adapter->num_tx_desc) last = 0;
3934                         done = last;
3935                 } else
3936                         break;
3937         }
3938         bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3939             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3940
3941         adapter->next_tx_to_clean = first;