b6d5052d13739f7042ee5386d4682bd8e20adc78
[dragonfly.git] / sys / dev / netif / re / if_re.c
1 /*
2  * Copyright (c) 2004
3  *      Joerg Sonnenberger <joerg@bec.de>.  All rights reserved.
4  *
5  * Copyright (c) 1997, 1998-2003
6  *      Bill Paul <wpaul@windriver.com>.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Bill Paul.
19  * 4. Neither the name of the author nor the names of any co-contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
33  * THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * $FreeBSD: src/sys/dev/re/if_re.c,v 1.25 2004/06/09 14:34:01 naddy Exp $
36  * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.99 2008/10/30 11:27:40 sephe Exp $
37  */
38
39 /*
40  * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
41  *
42  * Written by Bill Paul <wpaul@windriver.com>
43  * Senior Networking Software Engineer
44  * Wind River Systems
45  */
46
47 /*
48  * This driver is designed to support RealTek's next generation of
49  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
50  * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
51  * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
52  *
53  * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
54  * with the older 8139 family, however it also supports a special
55  * C+ mode of operation that provides several new performance enhancing
56  * features. These include:
57  *
58  *      o Descriptor based DMA mechanism. Each descriptor represents
59  *        a single packet fragment. Data buffers may be aligned on
60  *        any byte boundary.
61  *
62  *      o 64-bit DMA
63  *
64  *      o TCP/IP checksum offload for both RX and TX
65  *
66  *      o High and normal priority transmit DMA rings
67  *
68  *      o VLAN tag insertion and extraction
69  *
70  *      o TCP large send (segmentation offload)
71  *
72  * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
73  * programming API is fairly straightforward. The RX filtering, EEPROM
74  * access and PHY access is the same as it is on the older 8139 series
75  * chips.
76  *
77  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
78  * same programming API and feature set as the 8139C+ with the following
79  * differences and additions:
80  *
81  *      o 1000Mbps mode
82  *
83  *      o Jumbo frames
84  *
85  *      o GMII and TBI ports/registers for interfacing with copper
86  *        or fiber PHYs
87  *
88  *      o RX and TX DMA rings can have up to 1024 descriptors
89  *        (the 8139C+ allows a maximum of 64)
90  *
91  *      o Slight differences in register layout from the 8139C+
92  *
93  * The TX start and timer interrupt registers are at different locations
94  * on the 8169 than they are on the 8139C+. Also, the status word in the
95  * RX descriptor has a slightly different bit layout. The 8169 does not
96  * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
97  * copper gigE PHY.
98  *
99  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
100  * (the 'S' stands for 'single-chip'). These devices have the same
101  * programming API as the older 8169, but also have some vendor-specific
102  * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
103  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
104  * 
105  * This driver takes advantage of the RX and TX checksum offload and
106  * VLAN tag insertion/extraction features. It also implements TX
107  * interrupt moderation using the timer interrupt registers, which
108  * significantly reduces TX interrupt load. There is also support
109  * for jumbo frames, however the 8169/8169S/8110S can not transmit
110  * jumbo frames larger than 7440, so the max MTU possible with this
111  * driver is 7422 bytes.
112  */
113
114 #define _IP_VHL
115
116 #include "opt_polling.h"
117
118 #include <sys/param.h>
119 #include <sys/bus.h>
120 #include <sys/endian.h>
121 #include <sys/kernel.h>
122 #include <sys/in_cksum.h>
123 #include <sys/interrupt.h>
124 #include <sys/malloc.h>
125 #include <sys/mbuf.h>
126 #include <sys/rman.h>
127 #include <sys/serialize.h>
128 #include <sys/socket.h>
129 #include <sys/sockio.h>
130 #include <sys/sysctl.h>
131
132 #include <net/bpf.h>
133 #include <net/ethernet.h>
134 #include <net/if.h>
135 #include <net/ifq_var.h>
136 #include <net/if_arp.h>
137 #include <net/if_dl.h>
138 #include <net/if_media.h>
139 #include <net/if_types.h>
140 #include <net/vlan/if_vlan_var.h>
141 #include <net/vlan/if_vlan_ether.h>
142
143 #include <netinet/ip.h>
144
145 #include <dev/netif/mii_layer/mii.h>
146 #include <dev/netif/mii_layer/miivar.h>
147
148 #include <bus/pci/pcidevs.h>
149 #include <bus/pci/pcireg.h>
150 #include <bus/pci/pcivar.h>
151
152 /* "device miibus" required.  See GENERIC if you get errors here. */
153 #include "miibus_if.h"
154
155 #include <dev/netif/re/if_rereg.h>
156 #include <dev/netif/re/if_revar.h>
157
158 #define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
159
160 /*
161  * Various supported device vendors/types and their names.
162  */
163 static const struct re_type {
164         uint16_t        re_vid;
165         uint16_t        re_did;
166         const char      *re_name;
167 } re_devs[] = {
168         { PCI_VENDOR_DLINK, PCI_PRODUCT_DLINK_DGE528T,
169           "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
170
171         { PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8139,
172           "RealTek 8139C+ 10/100BaseTX" },
173
174         { PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8101E,
175           "RealTek 810x PCIe 10/100baseTX" },
176
177         { PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8168,
178           "RealTek 8111/8168 PCIe Gigabit Ethernet" },
179
180         { PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8169,
181           "RealTek 8110/8169 Gigabit Ethernet" },
182
183         { PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8169SC,
184           "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
185
186         { PCI_VENDOR_COREGA, PCI_PRODUCT_COREGA_CG_LAPCIGT,
187           "Corega CG-LAPCIGT Gigabit Ethernet" },
188
189         { PCI_VENDOR_LINKSYS, PCI_PRODUCT_LINKSYS_EG1032,
190           "Linksys EG1032 Gigabit Ethernet" },
191
192         { PCI_VENDOR_USR2, PCI_PRODUCT_USR2_997902,
193           "US Robotics 997902 Gigabit Ethernet" },
194
195         { PCI_VENDOR_TTTECH, PCI_PRODUCT_TTTECH_MC322,
196           "TTTech MC322 Gigabit Ethernet" },
197
198         { 0, 0, NULL }
199 };
200
201 static const struct re_hwrev re_hwrevs[] = {
202         { RE_HWREV_8139CPLUS,   RE_MACVER_UNKN,         ETHERMTU,
203           RE_C_HWCSUM | RE_C_8139CP | RE_C_FASTE },
204
205         { RE_HWREV_8169,        RE_MACVER_UNKN,         ETHERMTU,
206           RE_C_HWCSUM | RE_C_8169 },
207
208         { RE_HWREV_8110S,       RE_MACVER_03,           RE_MTU_6K,
209           RE_C_HWCSUM | RE_C_8169 },
210
211         { RE_HWREV_8169S,       RE_MACVER_03,           RE_MTU_6K,
212           RE_C_HWCSUM | RE_C_8169 },
213
214         { RE_HWREV_8169SB,      RE_MACVER_04,           RE_MTU_6K,
215           RE_C_HWCSUM | RE_C_PHYPMGT | RE_C_8169 },
216
217         { RE_HWREV_8169SC1,     RE_MACVER_05,           RE_MTU_6K,
218           RE_C_HWCSUM | RE_C_PHYPMGT | RE_C_8169 },
219
220         { RE_HWREV_8169SC2,     RE_MACVER_06,           RE_MTU_6K,
221           RE_C_HWCSUM | RE_C_PHYPMGT | RE_C_8169 },
222
223         { RE_HWREV_8168B1,      RE_MACVER_21,           RE_MTU_6K,
224           RE_C_HWIM | RE_C_HWCSUM | RE_C_PHYPMGT },
225
226         { RE_HWREV_8168B2,      RE_MACVER_23,           RE_MTU_6K,
227           RE_C_HWIM | RE_C_HWCSUM | RE_C_PHYPMGT | RE_C_AUTOPAD },
228
229         { RE_HWREV_8168B3,      RE_MACVER_23,           RE_MTU_6K,
230           RE_C_HWIM | RE_C_HWCSUM | RE_C_PHYPMGT | RE_C_AUTOPAD },
231
232         { RE_HWREV_8168C,       RE_MACVER_29,           RE_MTU_6K,
233           RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT |
234           RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX },
235
236         { RE_HWREV_8168CP,      RE_MACVER_2B,           RE_MTU_6K,
237           RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT |
238           RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX },
239
240         { RE_HWREV_8168D,       RE_MACVER_2A,           RE_MTU_9K,
241           RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT |
242           RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX },
243
244         { RE_HWREV_8168DP,      RE_MACVER_2D,           RE_MTU_9K,
245           RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT |
246           RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX },
247
248         { RE_HWREV_8168E,       RE_MACVER_UNKN,         RE_MTU_9K,
249           RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT |
250           RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX },
251
252         { RE_HWREV_8168F,       RE_MACVER_UNKN,         RE_MTU_9K,
253           RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT |
254           RE_C_AUTOPAD | RE_C_CONTIGRX | RE_C_STOP_RXTX },
255
256         { RE_HWREV_8100E,       RE_MACVER_UNKN,         ETHERMTU,
257           RE_C_HWCSUM | RE_C_FASTE },
258
259         { RE_HWREV_8101E1,      RE_MACVER_16,           ETHERMTU,
260           RE_C_HWCSUM | RE_C_FASTE },
261
262         { RE_HWREV_8101E2,      RE_MACVER_16,           ETHERMTU,
263           RE_C_HWCSUM | RE_C_FASTE },
264
265         { RE_HWREV_8102E,       RE_MACVER_15,           ETHERMTU,
266           RE_C_HWCSUM | RE_C_MAC2 | RE_C_AUTOPAD | RE_C_STOP_RXTX |
267           RE_C_FASTE },
268
269         { RE_HWREV_8102EL,      RE_MACVER_15,           ETHERMTU,
270           RE_C_HWCSUM | RE_C_MAC2 | RE_C_AUTOPAD | RE_C_STOP_RXTX |
271           RE_C_FASTE },
272
273         { RE_HWREV_NULL, 0, 0, 0 }
274 };
275
276 static int      re_probe(device_t);
277 static int      re_attach(device_t);
278 static int      re_detach(device_t);
279 static int      re_suspend(device_t);
280 static int      re_resume(device_t);
281 static void     re_shutdown(device_t);
282
283 static int      re_allocmem(device_t);
284 static void     re_freemem(device_t);
285 static void     re_freebufmem(struct re_softc *, int, int);
286 static int      re_encap(struct re_softc *, struct mbuf **, int *);
287 static int      re_newbuf_std(struct re_softc *, int, int);
288 static int      re_newbuf_jumbo(struct re_softc *, int, int);
289 static void     re_setup_rxdesc(struct re_softc *, int);
290 static int      re_rx_list_init(struct re_softc *);
291 static int      re_tx_list_init(struct re_softc *);
292 static int      re_rxeof(struct re_softc *);
293 static int      re_txeof(struct re_softc *);
294 static int      re_tx_collect(struct re_softc *);
295 static void     re_intr(void *);
296 static void     re_tick(void *);
297 static void     re_tick_serialized(void *);
298
299 static void     re_start(struct ifnet *);
300 static int      re_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
301 static void     re_init(void *);
302 static void     re_stop(struct re_softc *);
303 static void     re_watchdog(struct ifnet *);
304 static int      re_ifmedia_upd(struct ifnet *);
305 static void     re_ifmedia_sts(struct ifnet *, struct ifmediareq *);
306
307 static void     re_eeprom_putbyte(struct re_softc *, int);
308 static void     re_eeprom_getword(struct re_softc *, int, u_int16_t *);
309 static void     re_read_eeprom(struct re_softc *, caddr_t, int, int);
310 static void     re_get_eewidth(struct re_softc *);
311
312 static int      re_gmii_readreg(device_t, int, int);
313 static int      re_gmii_writereg(device_t, int, int, int);
314
315 static int      re_miibus_readreg(device_t, int, int);
316 static int      re_miibus_writereg(device_t, int, int, int);
317 static void     re_miibus_statchg(device_t);
318
319 static void     re_setmulti(struct re_softc *);
320 static void     re_reset(struct re_softc *, int);
321 static void     re_get_eaddr(struct re_softc *, uint8_t *);
322
323 static void     re_setup_hw_im(struct re_softc *);
324 static void     re_setup_sim_im(struct re_softc *);
325 static void     re_disable_hw_im(struct re_softc *);
326 static void     re_disable_sim_im(struct re_softc *);
327 static void     re_config_imtype(struct re_softc *, int);
328 static void     re_setup_intr(struct re_softc *, int, int);
329
330 static int      re_sysctl_hwtime(SYSCTL_HANDLER_ARGS, int *);
331 static int      re_sysctl_rxtime(SYSCTL_HANDLER_ARGS);
332 static int      re_sysctl_txtime(SYSCTL_HANDLER_ARGS);
333 static int      re_sysctl_simtime(SYSCTL_HANDLER_ARGS);
334 static int      re_sysctl_imtype(SYSCTL_HANDLER_ARGS);
335
336 static int      re_jpool_alloc(struct re_softc *);
337 static void     re_jpool_free(struct re_softc *);
338 static struct re_jbuf *re_jbuf_alloc(struct re_softc *);
339 static void     re_jbuf_free(void *);
340 static void     re_jbuf_ref(void *);
341
342 #ifdef RE_DIAG
343 static int      re_diag(struct re_softc *);
344 #endif
345
346 #ifdef DEVICE_POLLING
347 static void     re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count);
348 #endif
349
350 static device_method_t re_methods[] = {
351         /* Device interface */
352         DEVMETHOD(device_probe,         re_probe),
353         DEVMETHOD(device_attach,        re_attach),
354         DEVMETHOD(device_detach,        re_detach),
355         DEVMETHOD(device_suspend,       re_suspend),
356         DEVMETHOD(device_resume,        re_resume),
357         DEVMETHOD(device_shutdown,      re_shutdown),
358
359         /* bus interface */
360         DEVMETHOD(bus_print_child,      bus_generic_print_child),
361         DEVMETHOD(bus_driver_added,     bus_generic_driver_added),
362
363         /* MII interface */
364         DEVMETHOD(miibus_readreg,       re_miibus_readreg),
365         DEVMETHOD(miibus_writereg,      re_miibus_writereg),
366         DEVMETHOD(miibus_statchg,       re_miibus_statchg),
367
368         { 0, 0 }
369 };
370
371 static driver_t re_driver = {
372         "re",
373         re_methods,
374         sizeof(struct re_softc)
375 };
376
377 static devclass_t re_devclass;
378
379 DECLARE_DUMMY_MODULE(if_re);
380 MODULE_DEPEND(if_re, miibus, 1, 1, 1);
381 DRIVER_MODULE(if_re, pci, re_driver, re_devclass, 0, 0);
382 DRIVER_MODULE(if_re, cardbus, re_driver, re_devclass, 0, 0);
383 DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
384
385 static int      re_rx_desc_count = RE_RX_DESC_CNT_DEF;
386 static int      re_tx_desc_count = RE_TX_DESC_CNT_DEF;
387
388 TUNABLE_INT("hw.re.rx_desc_count", &re_rx_desc_count);
389 TUNABLE_INT("hw.re.tx_desc_count", &re_tx_desc_count);
390
391 #define EE_SET(x)       \
392         CSR_WRITE_1(sc, RE_EECMD, CSR_READ_1(sc, RE_EECMD) | (x))
393
394 #define EE_CLR(x)       \
395         CSR_WRITE_1(sc, RE_EECMD, CSR_READ_1(sc, RE_EECMD) & ~(x))
396
397 static __inline void
398 re_free_rxchain(struct re_softc *sc)
399 {
400         if (sc->re_head != NULL) {
401                 m_freem(sc->re_head);
402                 sc->re_head = sc->re_tail = NULL;
403         }
404 }
405
406 /*
407  * Send a read command and address to the EEPROM, check for ACK.
408  */
409 static void
410 re_eeprom_putbyte(struct re_softc *sc, int addr)
411 {
412         int d, i;
413
414         d = addr | (RE_9346_READ << sc->re_eewidth);
415
416         /*
417          * Feed in each bit and strobe the clock.
418          */
419         for (i = 1 << (sc->re_eewidth + 3); i; i >>= 1) {
420                 if (d & i)
421                         EE_SET(RE_EE_DATAIN);
422                 else
423                         EE_CLR(RE_EE_DATAIN);
424                 DELAY(100);
425                 EE_SET(RE_EE_CLK);
426                 DELAY(150);
427                 EE_CLR(RE_EE_CLK);
428                 DELAY(100);
429         }
430 }
431
432 /*
433  * Read a word of data stored in the EEPROM at address 'addr.'
434  */
435 static void
436 re_eeprom_getword(struct re_softc *sc, int addr, uint16_t *dest)
437 {
438         int i;
439         uint16_t word = 0;
440
441         /*
442          * Send address of word we want to read.
443          */
444         re_eeprom_putbyte(sc, addr);
445
446         /*
447          * Start reading bits from EEPROM.
448          */
449         for (i = 0x8000; i != 0; i >>= 1) {
450                 EE_SET(RE_EE_CLK);
451                 DELAY(100);
452                 if (CSR_READ_1(sc, RE_EECMD) & RE_EE_DATAOUT)
453                         word |= i;
454                 EE_CLR(RE_EE_CLK);
455                 DELAY(100);
456         }
457
458         *dest = word;
459 }
460
461 /*
462  * Read a sequence of words from the EEPROM.
463  */
464 static void
465 re_read_eeprom(struct re_softc *sc, caddr_t dest, int off, int cnt)
466 {
467         int i;
468         uint16_t word = 0, *ptr;
469
470         CSR_SETBIT_1(sc, RE_EECMD, RE_EEMODE_PROGRAM);
471         DELAY(100);
472
473         for (i = 0; i < cnt; i++) {
474                 CSR_SETBIT_1(sc, RE_EECMD, RE_EE_SEL);
475                 re_eeprom_getword(sc, off + i, &word);
476                 CSR_CLRBIT_1(sc, RE_EECMD, RE_EE_SEL);
477                 ptr = (uint16_t *)(dest + (i * 2));
478                 *ptr = word;
479         }
480
481         CSR_CLRBIT_1(sc, RE_EECMD, RE_EEMODE_PROGRAM);
482 }
483
484 static void
485 re_get_eewidth(struct re_softc *sc)
486 {
487         uint16_t re_did = 0;
488
489         sc->re_eewidth = 6;
490         re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
491         if (re_did != 0x8129)
492                 sc->re_eewidth = 8;
493 }
494
495 static int
496 re_gmii_readreg(device_t dev, int phy, int reg)
497 {
498         struct re_softc *sc = device_get_softc(dev);
499         u_int32_t rval;
500         int i;
501
502         if (phy != 1)
503                 return(0);
504
505         /* Let the rgephy driver read the GMEDIASTAT register */
506
507         if (reg == RE_GMEDIASTAT)
508                 return(CSR_READ_1(sc, RE_GMEDIASTAT));
509
510         CSR_WRITE_4(sc, RE_PHYAR, reg << 16);
511         DELAY(1000);
512
513         for (i = 0; i < RE_TIMEOUT; i++) {
514                 rval = CSR_READ_4(sc, RE_PHYAR);
515                 if (rval & RE_PHYAR_BUSY)
516                         break;
517                 DELAY(100);
518         }
519
520         if (i == RE_TIMEOUT) {
521                 device_printf(dev, "PHY read failed\n");
522                 return(0);
523         }
524
525         return(rval & RE_PHYAR_PHYDATA);
526 }
527
528 static int
529 re_gmii_writereg(device_t dev, int phy, int reg, int data)
530 {
531         struct re_softc *sc = device_get_softc(dev);
532         uint32_t rval;
533         int i;
534
535         CSR_WRITE_4(sc, RE_PHYAR,
536                     (reg << 16) | (data & RE_PHYAR_PHYDATA) | RE_PHYAR_BUSY);
537         DELAY(1000);
538
539         for (i = 0; i < RE_TIMEOUT; i++) {
540                 rval = CSR_READ_4(sc, RE_PHYAR);
541                 if ((rval & RE_PHYAR_BUSY) == 0)
542                         break;
543                 DELAY(100);
544         }
545
546         if (i == RE_TIMEOUT)
547                 device_printf(dev, "PHY write failed\n");
548
549         return(0);
550 }
551
552 static int
553 re_miibus_readreg(device_t dev, int phy, int reg)
554 {
555         struct re_softc *sc = device_get_softc(dev);
556         uint16_t rval = 0;
557         uint16_t re8139_reg = 0;
558
559         if (!RE_IS_8139CP(sc)) {
560                 rval = re_gmii_readreg(dev, phy, reg);
561                 return(rval);
562         }
563
564         /* Pretend the internal PHY is only at address 0 */
565         if (phy)
566                 return(0);
567
568         switch(reg) {
569         case MII_BMCR:
570                 re8139_reg = RE_BMCR;
571                 break;
572         case MII_BMSR:
573                 re8139_reg = RE_BMSR;
574                 break;
575         case MII_ANAR:
576                 re8139_reg = RE_ANAR;
577                 break;
578         case MII_ANER:
579                 re8139_reg = RE_ANER;
580                 break;
581         case MII_ANLPAR:
582                 re8139_reg = RE_LPAR;
583                 break;
584         case MII_PHYIDR1:
585         case MII_PHYIDR2:
586                 return(0);
587         /*
588          * Allow the rlphy driver to read the media status
589          * register. If we have a link partner which does not
590          * support NWAY, this is the register which will tell
591          * us the results of parallel detection.
592          */
593         case RE_MEDIASTAT:
594                 return(CSR_READ_1(sc, RE_MEDIASTAT));
595         default:
596                 device_printf(dev, "bad phy register\n");
597                 return(0);
598         }
599         rval = CSR_READ_2(sc, re8139_reg);
600         if (re8139_reg == RE_BMCR) {
601                 /* 8139C+ has different bit layout. */
602                 rval &= ~(BMCR_LOOP | BMCR_ISO);
603         }
604         return(rval);
605 }
606
607 static int
608 re_miibus_writereg(device_t dev, int phy, int reg, int data)
609 {
610         struct re_softc *sc= device_get_softc(dev);
611         u_int16_t re8139_reg = 0;
612
613         if (!RE_IS_8139CP(sc))
614                 return(re_gmii_writereg(dev, phy, reg, data));
615
616         /* Pretend the internal PHY is only at address 0 */
617         if (phy)
618                 return(0);
619
620         switch(reg) {
621         case MII_BMCR:
622                 re8139_reg = RE_BMCR;
623                 /* 8139C+ has different bit layout. */
624                 data &= ~(BMCR_LOOP | BMCR_ISO);
625                 break;
626         case MII_BMSR:
627                 re8139_reg = RE_BMSR;
628                 break;
629         case MII_ANAR:
630                 re8139_reg = RE_ANAR;
631                 break;
632         case MII_ANER:
633                 re8139_reg = RE_ANER;
634                 break;
635         case MII_ANLPAR:
636                 re8139_reg = RE_LPAR;
637                 break;
638         case MII_PHYIDR1:
639         case MII_PHYIDR2:
640                 return(0);
641         default:
642                 device_printf(dev, "bad phy register\n");
643                 return(0);
644         }
645         CSR_WRITE_2(sc, re8139_reg, data);
646         return(0);
647 }
648
649 static void
650 re_miibus_statchg(device_t dev)
651 {
652 }
653
654 /*
655  * Program the 64-bit multicast hash filter.
656  */
657 static void
658 re_setmulti(struct re_softc *sc)
659 {
660         struct ifnet *ifp = &sc->arpcom.ac_if;
661         int h = 0;
662         uint32_t hashes[2] = { 0, 0 };
663         struct ifmultiaddr *ifma;
664         uint32_t rxfilt;
665         int mcnt = 0;
666
667         rxfilt = CSR_READ_4(sc, RE_RXCFG);
668
669         /* Set the individual bit to receive frames for this host only. */
670         rxfilt |= RE_RXCFG_RX_INDIV;
671         /* Set capture broadcast bit to capture broadcast frames. */
672         rxfilt |= RE_RXCFG_RX_BROAD;
673
674         rxfilt &= ~(RE_RXCFG_RX_ALLPHYS | RE_RXCFG_RX_MULTI);
675         if ((ifp->if_flags & IFF_ALLMULTI) || (ifp->if_flags & IFF_PROMISC)) {
676                 rxfilt |= RE_RXCFG_RX_MULTI;
677
678                 /* If we want promiscuous mode, set the allframes bit. */
679                 if (ifp->if_flags & IFF_PROMISC)
680                         rxfilt |= RE_RXCFG_RX_ALLPHYS;
681
682                 CSR_WRITE_4(sc, RE_RXCFG, rxfilt);
683                 CSR_WRITE_4(sc, RE_MAR0, 0xFFFFFFFF);
684                 CSR_WRITE_4(sc, RE_MAR4, 0xFFFFFFFF);
685                 return;
686         }
687
688         /* first, zot all the existing hash bits */
689         CSR_WRITE_4(sc, RE_MAR0, 0);
690         CSR_WRITE_4(sc, RE_MAR4, 0);
691
692         /* now program new ones */
693         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
694                 if (ifma->ifma_addr->sa_family != AF_LINK)
695                         continue;
696                 h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
697                     ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
698                 if (h < 32)
699                         hashes[0] |= (1 << h);
700                 else
701                         hashes[1] |= (1 << (h - 32));
702                 mcnt++;
703         }
704
705         if (mcnt)
706                 rxfilt |= RE_RXCFG_RX_MULTI;
707         else
708                 rxfilt &= ~RE_RXCFG_RX_MULTI;
709
710         CSR_WRITE_4(sc, RE_RXCFG, rxfilt);
711
712         /*
713          * For some unfathomable reason, RealTek decided to reverse
714          * the order of the multicast hash registers in the PCI Express
715          * parts. This means we have to write the hash pattern in reverse
716          * order for those devices.
717          */
718         if (sc->re_caps & RE_C_PCIE) {
719                 CSR_WRITE_4(sc, RE_MAR0, bswap32(hashes[1]));
720                 CSR_WRITE_4(sc, RE_MAR4, bswap32(hashes[0]));
721         } else {
722                 CSR_WRITE_4(sc, RE_MAR0, hashes[0]);
723                 CSR_WRITE_4(sc, RE_MAR4, hashes[1]);
724         }
725 }
726
727 static void
728 re_reset(struct re_softc *sc, int running)
729 {
730         int i;
731
732         if ((sc->re_caps & RE_C_STOP_RXTX) && running) {
733                 CSR_WRITE_1(sc, RE_COMMAND,
734                             RE_CMD_STOPREQ | RE_CMD_TX_ENB | RE_CMD_RX_ENB);
735                 DELAY(100);
736         }
737
738         CSR_WRITE_1(sc, RE_COMMAND, RE_CMD_RESET);
739
740         for (i = 0; i < RE_TIMEOUT; i++) {
741                 DELAY(10);
742                 if ((CSR_READ_1(sc, RE_COMMAND) & RE_CMD_RESET) == 0)
743                         break;
744         }
745         if (i == RE_TIMEOUT)
746                 if_printf(&sc->arpcom.ac_if, "reset never completed!\n");
747 }
748
749 #ifdef RE_DIAG
750 /*
751  * The following routine is designed to test for a defect on some
752  * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
753  * lines connected to the bus, however for a 32-bit only card, they
754  * should be pulled high. The result of this defect is that the
755  * NIC will not work right if you plug it into a 64-bit slot: DMA
756  * operations will be done with 64-bit transfers, which will fail
757  * because the 64-bit data lines aren't connected.
758  *
759  * There's no way to work around this (short of talking a soldering
760  * iron to the board), however we can detect it. The method we use
761  * here is to put the NIC into digital loopback mode, set the receiver
762  * to promiscuous mode, and then try to send a frame. We then compare
763  * the frame data we sent to what was received. If the data matches,
764  * then the NIC is working correctly, otherwise we know the user has
765  * a defective NIC which has been mistakenly plugged into a 64-bit PCI
766  * slot. In the latter case, there's no way the NIC can work correctly,
767  * so we print out a message on the console and abort the device attach.
768  */
769
770 static int
771 re_diag(struct re_softc *sc)
772 {
773         struct ifnet *ifp = &sc->arpcom.ac_if;
774         struct mbuf *m0;
775         struct ether_header *eh;
776         struct re_desc *cur_rx;
777         uint16_t status;
778         uint32_t rxstat;
779         int total_len, i, error = 0, phyaddr;
780         uint8_t dst[ETHER_ADDR_LEN] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
781         uint8_t src[ETHER_ADDR_LEN] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
782
783         /* Allocate a single mbuf */
784
785         MGETHDR(m0, MB_DONTWAIT, MT_DATA);
786         if (m0 == NULL)
787                 return(ENOBUFS);
788
789         /*
790          * Initialize the NIC in test mode. This sets the chip up
791          * so that it can send and receive frames, but performs the
792          * following special functions:
793          * - Puts receiver in promiscuous mode
794          * - Enables digital loopback mode
795          * - Leaves interrupts turned off
796          */
797
798         ifp->if_flags |= IFF_PROMISC;
799         sc->re_flags |= RE_F_TESTMODE;
800         re_init(sc);
801         sc->re_flags |= RE_F_LINKED;
802         if (!RE_IS_8139CP(sc))
803                 phyaddr = 1;
804         else
805                 phyaddr = 0;
806
807         re_miibus_writereg(sc->re_dev, phyaddr, MII_BMCR, BMCR_RESET);
808         for (i = 0; i < RE_TIMEOUT; i++) {
809                 status = re_miibus_readreg(sc->re_dev, phyaddr, MII_BMCR);
810                 if (!(status & BMCR_RESET))
811                         break;
812         }
813
814         re_miibus_writereg(sc->re_dev, phyaddr, MII_BMCR, BMCR_LOOP);
815         CSR_WRITE_2(sc, RE_ISR, RE_INTRS_DIAG);
816
817         DELAY(100000);
818
819         /* Put some data in the mbuf */
820
821         eh = mtod(m0, struct ether_header *);
822         bcopy (dst, eh->ether_dhost, ETHER_ADDR_LEN);
823         bcopy (src, eh->ether_shost, ETHER_ADDR_LEN);
824         eh->ether_type = htons(ETHERTYPE_IP);
825         m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
826
827         /*
828          * Queue the packet, start transmission.
829          * Note: ifq_handoff() ultimately calls re_start() for us.
830          */
831
832         CSR_WRITE_2(sc, RE_ISR, 0xFFFF);
833         error = ifq_handoff(ifp, m0, NULL);
834         if (error) {
835                 m0 = NULL;
836                 goto done;
837         }
838         m0 = NULL;
839
840         /* Wait for it to propagate through the chip */
841
842         DELAY(100000);
843         for (i = 0; i < RE_TIMEOUT; i++) {
844                 status = CSR_READ_2(sc, RE_ISR);
845                 CSR_WRITE_2(sc, RE_ISR, status);
846                 if ((status & (RE_ISR_TIMEOUT_EXPIRED|RE_ISR_RX_OK)) ==
847                     (RE_ISR_TIMEOUT_EXPIRED|RE_ISR_RX_OK))
848                         break;
849                 DELAY(10);
850         }
851
852         if (i == RE_TIMEOUT) {
853                 if_printf(ifp, "diagnostic failed to receive packet "
854                           "in loopback mode\n");
855                 error = EIO;
856                 goto done;
857         }
858
859         /*
860          * The packet should have been dumped into the first
861          * entry in the RX DMA ring. Grab it from there.
862          */
863
864         bus_dmamap_sync(sc->re_ldata.re_rx_mtag, sc->re_ldata.re_rx_dmamap[0],
865                         BUS_DMASYNC_POSTREAD);
866         bus_dmamap_unload(sc->re_ldata.re_rx_mtag,
867                           sc->re_ldata.re_rx_dmamap[0]);
868
869         m0 = sc->re_ldata.re_rx_mbuf[0];
870         sc->re_ldata.re_rx_mbuf[0] = NULL;
871         eh = mtod(m0, struct ether_header *);
872
873         cur_rx = &sc->re_ldata.re_rx_list[0];
874         total_len = RE_RXBYTES(cur_rx);
875         rxstat = le32toh(cur_rx->re_cmdstat);
876
877         if (total_len != ETHER_MIN_LEN) {
878                 if_printf(ifp, "diagnostic failed, received short packet\n");
879                 error = EIO;
880                 goto done;
881         }
882
883         /* Test that the received packet data matches what we sent. */
884
885         if (bcmp(eh->ether_dhost, dst, ETHER_ADDR_LEN) ||
886             bcmp(eh->ether_shost, &src, ETHER_ADDR_LEN) ||
887             be16toh(eh->ether_type) != ETHERTYPE_IP) {
888                 if_printf(ifp, "WARNING, DMA FAILURE!\n");
889                 if_printf(ifp, "expected TX data: %6D/%6D/0x%x\n",
890                     dst, ":", src, ":", ETHERTYPE_IP);
891                 if_printf(ifp, "received RX data: %6D/%6D/0x%x\n",
892                     eh->ether_dhost, ":",  eh->ether_shost, ":",
893                     ntohs(eh->ether_type));
894                 if_printf(ifp, "You may have a defective 32-bit NIC plugged "
895                     "into a 64-bit PCI slot.\n");
896                 if_printf(ifp, "Please re-install the NIC in a 32-bit slot "
897                     "for proper operation.\n");
898                 if_printf(ifp, "Read the re(4) man page for more details.\n");
899                 error = EIO;
900         }
901
902 done:
903         /* Turn interface off, release resources */
904
905         sc->re_flags &= ~(RE_F_LINKED | RE_F_TESTMODE);
906         ifp->if_flags &= ~IFF_PROMISC;
907         re_stop(sc);
908         if (m0 != NULL)
909                 m_freem(m0);
910
911         return (error);
912 }
913 #endif  /* RE_DIAG */
914
915 /*
916  * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
917  * IDs against our list and return a device name if we find a match.
918  */
919 static int
920 re_probe(device_t dev)
921 {
922         const struct re_type *t;
923         const struct re_hwrev *hw_rev;
924         struct re_softc *sc;
925         int rid;
926         uint32_t hwrev, macmode, txcfg;
927         uint16_t vendor, product;
928
929         vendor = pci_get_vendor(dev);
930         product = pci_get_device(dev);
931
932         /*
933          * Only attach to rev.3 of the Linksys EG1032 adapter.
934          * Rev.2 is supported by sk(4).
935          */
936         if (vendor == PCI_VENDOR_LINKSYS &&
937             product == PCI_PRODUCT_LINKSYS_EG1032 &&
938             pci_get_subdevice(dev) != PCI_SUBDEVICE_LINKSYS_EG1032_REV3)
939                 return ENXIO;
940
941         if (vendor == PCI_VENDOR_REALTEK &&
942             product == PCI_PRODUCT_REALTEK_RT8139 &&
943             pci_get_revid(dev) != PCI_REVID_REALTEK_RT8139CP) {
944                 /* Poor 8139 */
945                 return ENXIO;
946         }
947
948         for (t = re_devs; t->re_name != NULL; t++) {
949                 if (product == t->re_did && vendor == t->re_vid)
950                         break;
951         }
952
953         /*
954          * Check if we found a RealTek device.
955          */
956         if (t->re_name == NULL)
957                 return ENXIO;
958
959         /*
960          * Temporarily map the I/O space so we can read the chip ID register.
961          */
962         sc = kmalloc(sizeof(*sc), M_TEMP, M_WAITOK | M_ZERO);
963         rid = RE_PCI_LOIO;
964         sc->re_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
965                                             RF_ACTIVE);
966         if (sc->re_res == NULL) {
967                 device_printf(dev, "couldn't map ports/memory\n");
968                 kfree(sc, M_TEMP);
969                 return ENXIO;
970         }
971
972         sc->re_btag = rman_get_bustag(sc->re_res);
973         sc->re_bhandle = rman_get_bushandle(sc->re_res);
974
975         txcfg = CSR_READ_4(sc, RE_TXCFG);
976         hwrev = txcfg & RE_TXCFG_HWREV;
977         macmode = txcfg & RE_TXCFG_MACMODE;
978         bus_release_resource(dev, SYS_RES_IOPORT, RE_PCI_LOIO, sc->re_res);
979         kfree(sc, M_TEMP);
980
981         /*
982          * and continue matching for the specific chip...
983          */
984         for (hw_rev = re_hwrevs; hw_rev->re_hwrev != RE_HWREV_NULL; hw_rev++) {
985                 if (hw_rev->re_hwrev == hwrev) {
986                         sc = device_get_softc(dev);
987
988                         sc->re_hwrev = hw_rev->re_hwrev;
989                         sc->re_macver = hw_rev->re_macver;
990                         sc->re_caps = hw_rev->re_caps;
991                         sc->re_maxmtu = hw_rev->re_maxmtu;
992
993                         /*
994                          * Apply chip property fixup
995                          */
996                         switch (sc->re_hwrev) {
997                         case RE_HWREV_8101E1:
998                         case RE_HWREV_8101E2:
999                                 if (macmode == 0)
1000                                         sc->re_macver = RE_MACVER_11;
1001                                 else if (macmode == 0x200000)
1002                                         sc->re_macver = RE_MACVER_12;
1003                                 break;
1004                         case RE_HWREV_8102E:
1005                         case RE_HWREV_8102EL:
1006                                 if (macmode == 0)
1007                                         sc->re_macver = RE_MACVER_13;
1008                                 else if (macmode == 0x100000)
1009                                         sc->re_macver = RE_MACVER_14;
1010                                 break;
1011                         case RE_HWREV_8168B2:
1012                         case RE_HWREV_8168B3:
1013                                 if (macmode == 0)
1014                                         sc->re_macver = RE_MACVER_22;
1015                                 break;
1016                         case RE_HWREV_8168C:
1017                                 if (macmode == 0)
1018                                         sc->re_macver = RE_MACVER_24;
1019                                 else if (macmode == 0x200000)
1020                                         sc->re_macver = RE_MACVER_25;
1021                                 else if (macmode == 0x300000)
1022                                         sc->re_macver = RE_MACVER_27;
1023                                 break;
1024                         case RE_HWREV_8168CP:
1025                                 if (macmode == 0)
1026                                         sc->re_macver = RE_MACVER_26;
1027                                 else if (macmode == 0x100000)
1028                                         sc->re_macver = RE_MACVER_28;
1029                                 break;
1030                         case RE_HWREV_8168DP:
1031                                 if (macmode == 0)
1032                                         sc->re_macver = RE_MACVER_2B;
1033                                 else if (macmode == 0x200000)
1034                                         sc->re_macver = RE_MACVER_2C;
1035                                 break;
1036                         case RE_HWREV_8168E:
1037                                 if (macmode == 0x100000)
1038                                         sc->re_macver = RE_MACVER_2E;
1039                                 else if (macmode == 0x200000)
1040                                         sc->re_macver = RE_MACVER_2F;
1041                                 break;
1042                         case RE_HWREV_8168F:
1043                                 if (macmode == 0x000000)
1044                                         sc->re_macver = RE_MACVER_30;
1045                                 else if (macmode == 0x100000)
1046                                         sc->re_macver = RE_MACVER_31;
1047                                 break;
1048                         }
1049                         if (pci_is_pcie(dev))
1050                                 sc->re_caps |= RE_C_PCIE;
1051
1052                         device_set_desc(dev, t->re_name);
1053                         return 0;
1054                 }
1055         }
1056
1057         if (bootverbose) {
1058                 device_printf(dev, "unknown hwrev 0x%08x, macmode 0x%08x\n",
1059                               hwrev, macmode);
1060         }
1061         return ENXIO;
1062 }
1063
1064 static int
1065 re_allocmem(device_t dev)
1066 {
1067         struct re_softc *sc = device_get_softc(dev);
1068         bus_dmamem_t dmem;
1069         int error, i;
1070
1071         /*
1072          * Allocate list data
1073          */
1074         sc->re_ldata.re_tx_mbuf =
1075         kmalloc(sc->re_tx_desc_cnt * sizeof(struct mbuf *),
1076                 M_DEVBUF, M_ZERO | M_WAITOK);
1077
1078         sc->re_ldata.re_rx_mbuf =
1079         kmalloc(sc->re_rx_desc_cnt * sizeof(struct mbuf *),
1080                 M_DEVBUF, M_ZERO | M_WAITOK);
1081
1082         sc->re_ldata.re_rx_paddr =
1083         kmalloc(sc->re_rx_desc_cnt * sizeof(bus_addr_t),
1084                 M_DEVBUF, M_ZERO | M_WAITOK);
1085
1086         sc->re_ldata.re_tx_dmamap =
1087         kmalloc(sc->re_tx_desc_cnt * sizeof(bus_dmamap_t),
1088                 M_DEVBUF, M_ZERO | M_WAITOK);
1089
1090         sc->re_ldata.re_rx_dmamap =
1091         kmalloc(sc->re_rx_desc_cnt * sizeof(bus_dmamap_t),
1092                 M_DEVBUF, M_ZERO | M_WAITOK);
1093
1094         /*
1095          * Allocate the parent bus DMA tag appropriate for PCI.
1096          */
1097         error = bus_dma_tag_create(NULL,        /* parent */
1098                         1, 0,                   /* alignment, boundary */
1099                         BUS_SPACE_MAXADDR,      /* lowaddr */
1100                         BUS_SPACE_MAXADDR,      /* highaddr */
1101                         NULL, NULL,             /* filter, filterarg */
1102                         BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
1103                         0,                      /* nsegments */
1104                         BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
1105                         0,                      /* flags */
1106                         &sc->re_parent_tag);
1107         if (error) {
1108                 device_printf(dev, "could not allocate parent dma tag\n");
1109                 return error;
1110         }
1111
1112         /* Allocate TX descriptor list. */
1113         error = bus_dmamem_coherent(sc->re_parent_tag,
1114                         RE_RING_ALIGN, 0,
1115                         BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
1116                         RE_TX_LIST_SZ(sc), BUS_DMA_WAITOK | BUS_DMA_ZERO,
1117                         &dmem);
1118         if (error) {
1119                 device_printf(dev, "could not allocate TX ring\n");
1120                 return error;
1121         }
1122         sc->re_ldata.re_tx_list_tag = dmem.dmem_tag;
1123         sc->re_ldata.re_tx_list_map = dmem.dmem_map;
1124         sc->re_ldata.re_tx_list = dmem.dmem_addr;
1125         sc->re_ldata.re_tx_list_addr = dmem.dmem_busaddr;
1126
1127         /* Allocate RX descriptor list. */
1128         error = bus_dmamem_coherent(sc->re_parent_tag,
1129                         RE_RING_ALIGN, 0,
1130                         BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
1131                         RE_RX_LIST_SZ(sc), BUS_DMA_WAITOK | BUS_DMA_ZERO,
1132                         &dmem);
1133         if (error) {
1134                 device_printf(dev, "could not allocate RX ring\n");
1135                 return error;
1136         }
1137         sc->re_ldata.re_rx_list_tag = dmem.dmem_tag;
1138         sc->re_ldata.re_rx_list_map = dmem.dmem_map;
1139         sc->re_ldata.re_rx_list = dmem.dmem_addr;
1140         sc->re_ldata.re_rx_list_addr = dmem.dmem_busaddr;
1141
1142         /* Allocate maps for TX mbufs. */
1143         error = bus_dma_tag_create(sc->re_parent_tag,
1144                         1, 0,
1145                         BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
1146                         NULL, NULL,
1147                         RE_FRAMELEN_MAX, RE_MAXSEGS, MCLBYTES,
1148                         BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
1149                         &sc->re_ldata.re_tx_mtag);
1150         if (error) {
1151                 device_printf(dev, "could not allocate TX buf dma tag\n");
1152                 return(error);
1153         }
1154
1155         /* Create DMA maps for TX buffers */
1156         for (i = 0; i < sc->re_tx_desc_cnt; i++) {
1157                 error = bus_dmamap_create(sc->re_ldata.re_tx_mtag,
1158                                 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
1159                                 &sc->re_ldata.re_tx_dmamap[i]);
1160                 if (error) {
1161                         device_printf(dev, "can't create DMA map for TX buf\n");
1162                         re_freebufmem(sc, i, 0);
1163                         return(error);
1164                 }
1165         }
1166
1167         /* Allocate maps for RX mbufs. */
1168         error = bus_dma_tag_create(sc->re_parent_tag,
1169                         RE_RXBUF_ALIGN, 0,
1170                         BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
1171                         NULL, NULL,
1172                         MCLBYTES, 1, MCLBYTES,
1173                         BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK | BUS_DMA_ALIGNED,
1174                         &sc->re_ldata.re_rx_mtag);
1175         if (error) {
1176                 device_printf(dev, "could not allocate RX buf dma tag\n");
1177                 return(error);
1178         }
1179
1180         /* Create spare DMA map for RX */
1181         error = bus_dmamap_create(sc->re_ldata.re_rx_mtag, BUS_DMA_WAITOK,
1182                         &sc->re_ldata.re_rx_spare);
1183         if (error) {
1184                 device_printf(dev, "can't create spare DMA map for RX\n");
1185                 bus_dma_tag_destroy(sc->re_ldata.re_rx_mtag);
1186                 sc->re_ldata.re_rx_mtag = NULL;
1187                 return error;
1188         }
1189
1190         /* Create DMA maps for RX buffers */
1191         for (i = 0; i < sc->re_rx_desc_cnt; i++) {
1192                 error = bus_dmamap_create(sc->re_ldata.re_rx_mtag,
1193                                 BUS_DMA_WAITOK, &sc->re_ldata.re_rx_dmamap[i]);
1194                 if (error) {
1195                         device_printf(dev, "can't create DMA map for RX buf\n");
1196                         re_freebufmem(sc, sc->re_tx_desc_cnt, i);
1197                         return(error);
1198                 }
1199         }
1200
1201         /* Create jumbo buffer pool for RX if required */
1202         if (sc->re_caps & RE_C_CONTIGRX) {
1203                 error = re_jpool_alloc(sc);
1204                 if (error) {
1205                         re_jpool_free(sc);
1206                         /* Disable jumbo frame support */
1207                         sc->re_maxmtu = ETHERMTU;
1208                 }
1209         }
1210         return(0);
1211 }
1212
1213 static void
1214 re_freebufmem(struct re_softc *sc, int tx_cnt, int rx_cnt)
1215 {
1216         int i;
1217
1218         /* Destroy all the RX and TX buffer maps */
1219         if (sc->re_ldata.re_tx_mtag) {
1220                 for (i = 0; i < tx_cnt; i++) {
1221                         bus_dmamap_destroy(sc->re_ldata.re_tx_mtag,
1222                                            sc->re_ldata.re_tx_dmamap[i]);
1223                 }
1224                 bus_dma_tag_destroy(sc->re_ldata.re_tx_mtag);
1225                 sc->re_ldata.re_tx_mtag = NULL;
1226         }
1227
1228         if (sc->re_ldata.re_rx_mtag) {
1229                 for (i = 0; i < rx_cnt; i++) {
1230                         bus_dmamap_destroy(sc->re_ldata.re_rx_mtag,
1231                                            sc->re_ldata.re_rx_dmamap[i]);
1232                 }
1233                 bus_dmamap_destroy(sc->re_ldata.re_rx_mtag,
1234                                    sc->re_ldata.re_rx_spare);
1235                 bus_dma_tag_destroy(sc->re_ldata.re_rx_mtag);
1236                 sc->re_ldata.re_rx_mtag = NULL;
1237         }
1238 }
1239
1240 static void
1241 re_freemem(device_t dev)
1242 {
1243         struct re_softc *sc = device_get_softc(dev);
1244
1245         /* Unload and free the RX DMA ring memory and map */
1246         if (sc->re_ldata.re_rx_list_tag) {
1247                 bus_dmamap_unload(sc->re_ldata.re_rx_list_tag,
1248                                   sc->re_ldata.re_rx_list_map);
1249                 bus_dmamem_free(sc->re_ldata.re_rx_list_tag,
1250                                 sc->re_ldata.re_rx_list,
1251                                 sc->re_ldata.re_rx_list_map);
1252                 bus_dma_tag_destroy(sc->re_ldata.re_rx_list_tag);
1253         }
1254
1255         /* Unload and free the TX DMA ring memory and map */
1256         if (sc->re_ldata.re_tx_list_tag) {
1257                 bus_dmamap_unload(sc->re_ldata.re_tx_list_tag,
1258                                   sc->re_ldata.re_tx_list_map);
1259                 bus_dmamem_free(sc->re_ldata.re_tx_list_tag,
1260                                 sc->re_ldata.re_tx_list,
1261                                 sc->re_ldata.re_tx_list_map);
1262                 bus_dma_tag_destroy(sc->re_ldata.re_tx_list_tag);
1263         }
1264
1265         /* Free RX/TX buf DMA stuffs */
1266         re_freebufmem(sc, sc->re_tx_desc_cnt, sc->re_rx_desc_cnt);
1267
1268         /* Unload and free the stats buffer and map */
1269         if (sc->re_ldata.re_stag) {
1270                 bus_dmamap_unload(sc->re_ldata.re_stag, sc->re_ldata.re_smap);
1271                 bus_dmamem_free(sc->re_ldata.re_stag,
1272                                 sc->re_ldata.re_stats,
1273                                 sc->re_ldata.re_smap);
1274                 bus_dma_tag_destroy(sc->re_ldata.re_stag);
1275         }
1276
1277         if (sc->re_caps & RE_C_CONTIGRX)
1278                 re_jpool_free(sc);
1279
1280         if (sc->re_parent_tag)
1281                 bus_dma_tag_destroy(sc->re_parent_tag);
1282
1283         if (sc->re_ldata.re_tx_mbuf != NULL)
1284                 kfree(sc->re_ldata.re_tx_mbuf, M_DEVBUF);
1285         if (sc->re_ldata.re_rx_mbuf != NULL)
1286                 kfree(sc->re_ldata.re_rx_mbuf, M_DEVBUF);
1287         if (sc->re_ldata.re_rx_paddr != NULL)
1288                 kfree(sc->re_ldata.re_rx_paddr, M_DEVBUF);
1289         if (sc->re_ldata.re_tx_dmamap != NULL)
1290                 kfree(sc->re_ldata.re_tx_dmamap, M_DEVBUF);
1291         if (sc->re_ldata.re_rx_dmamap != NULL)
1292                 kfree(sc->re_ldata.re_rx_dmamap, M_DEVBUF);
1293 }
1294
1295 /*
1296  * Attach the interface. Allocate softc structures, do ifmedia
1297  * setup and ethernet/BPF attach.
1298  */
1299 static int
1300 re_attach(device_t dev)
1301 {
1302         struct re_softc *sc = device_get_softc(dev);
1303         struct ifnet *ifp;
1304         uint8_t eaddr[ETHER_ADDR_LEN];
1305         int error = 0, rid, qlen;
1306
1307         callout_init(&sc->re_timer);
1308         sc->re_dev = dev;
1309
1310         if (RE_IS_8139CP(sc)) {
1311                 sc->re_rx_desc_cnt = RE_RX_DESC_CNT_8139CP;
1312                 sc->re_tx_desc_cnt = RE_TX_DESC_CNT_8139CP;
1313         } else {
1314                 sc->re_rx_desc_cnt = re_rx_desc_count;
1315                 if (sc->re_rx_desc_cnt > RE_RX_DESC_CNT_MAX)
1316                         sc->re_rx_desc_cnt = RE_RX_DESC_CNT_MAX;
1317
1318                 sc->re_tx_desc_cnt = re_tx_desc_count;
1319                 if (sc->re_tx_desc_cnt > RE_TX_DESC_CNT_MAX)
1320                         sc->re_tx_desc_cnt = RE_TX_DESC_CNT_MAX;
1321         }
1322
1323         qlen = RE_IFQ_MAXLEN;
1324         if (sc->re_tx_desc_cnt > qlen)
1325                 qlen = sc->re_tx_desc_cnt;
1326
1327         sc->re_rxbuf_size = MCLBYTES;
1328         sc->re_newbuf = re_newbuf_std;
1329
1330         sc->re_tx_time = 5;             /* 125us */
1331         sc->re_rx_time = 2;             /* 50us */
1332         if (sc->re_caps & RE_C_PCIE)
1333                 sc->re_sim_time = 75;   /* 75us */
1334         else
1335                 sc->re_sim_time = 125;  /* 125us */
1336         if (!RE_IS_8139CP(sc)) {
1337                 /* simulated interrupt moderation */
1338                 sc->re_imtype = RE_IMTYPE_SIM;
1339         } else {
1340                 sc->re_imtype = RE_IMTYPE_NONE;
1341         }
1342         re_config_imtype(sc, sc->re_imtype);
1343
1344         sysctl_ctx_init(&sc->re_sysctl_ctx);
1345         sc->re_sysctl_tree = SYSCTL_ADD_NODE(&sc->re_sysctl_ctx,
1346                                              SYSCTL_STATIC_CHILDREN(_hw),
1347                                              OID_AUTO,
1348                                              device_get_nameunit(dev),
1349                                              CTLFLAG_RD, 0, "");
1350         if (sc->re_sysctl_tree == NULL) {
1351                 device_printf(dev, "can't add sysctl node\n");
1352                 error = ENXIO;
1353                 goto fail;
1354         }
1355         SYSCTL_ADD_INT(&sc->re_sysctl_ctx,
1356                        SYSCTL_CHILDREN(sc->re_sysctl_tree), OID_AUTO,
1357                        "rx_desc_count", CTLFLAG_RD, &sc->re_rx_desc_cnt,
1358                        0, "RX desc count");
1359         SYSCTL_ADD_INT(&sc->re_sysctl_ctx,
1360                        SYSCTL_CHILDREN(sc->re_sysctl_tree), OID_AUTO,
1361                        "tx_desc_count", CTLFLAG_RD, &sc->re_tx_desc_cnt,
1362                        0, "TX desc count");
1363         SYSCTL_ADD_PROC(&sc->re_sysctl_ctx,
1364                         SYSCTL_CHILDREN(sc->re_sysctl_tree),
1365                         OID_AUTO, "sim_time",
1366                         CTLTYPE_INT | CTLFLAG_RW,
1367                         sc, 0, re_sysctl_simtime, "I",
1368                         "Simulated interrupt moderation time (usec).");
1369         SYSCTL_ADD_PROC(&sc->re_sysctl_ctx,
1370                         SYSCTL_CHILDREN(sc->re_sysctl_tree),
1371                         OID_AUTO, "imtype",
1372                         CTLTYPE_INT | CTLFLAG_RW,
1373                         sc, 0, re_sysctl_imtype, "I",
1374                         "Interrupt moderation type -- "
1375                         "0:disable, 1:simulated, "
1376                         "2:hardware(if supported)");
1377         if (sc->re_caps & RE_C_HWIM) {
1378                 SYSCTL_ADD_PROC(&sc->re_sysctl_ctx,
1379                                 SYSCTL_CHILDREN(sc->re_sysctl_tree),
1380                                 OID_AUTO, "hw_rxtime",
1381                                 CTLTYPE_INT | CTLFLAG_RW,
1382                                 sc, 0, re_sysctl_rxtime, "I",
1383                                 "Hardware interrupt moderation time "
1384                                 "(unit: 25usec).");
1385                 SYSCTL_ADD_PROC(&sc->re_sysctl_ctx,
1386                                 SYSCTL_CHILDREN(sc->re_sysctl_tree),
1387                                 OID_AUTO, "hw_txtime",
1388                                 CTLTYPE_INT | CTLFLAG_RW,
1389                                 sc, 0, re_sysctl_txtime, "I",
1390                                 "Hardware interrupt moderation time "
1391                                 "(unit: 25usec).");
1392         }
1393
1394 #ifndef BURN_BRIDGES
1395         /*
1396          * Handle power management nonsense.
1397          */
1398
1399         if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
1400                 uint32_t membase, irq;
1401
1402                 /* Save important PCI config data. */
1403                 membase = pci_read_config(dev, RE_PCI_LOMEM, 4);
1404                 irq = pci_read_config(dev, PCIR_INTLINE, 4);
1405
1406                 /* Reset the power state. */
1407                 device_printf(dev, "chip is in D%d power mode "
1408                     "-- setting to D0\n", pci_get_powerstate(dev));
1409
1410                 pci_set_powerstate(dev, PCI_POWERSTATE_D0);
1411
1412                 /* Restore PCI config data. */
1413                 pci_write_config(dev, RE_PCI_LOMEM, membase, 4);
1414                 pci_write_config(dev, PCIR_INTLINE, irq, 4);
1415         }
1416 #endif
1417         /*
1418          * Map control/status registers.
1419          */
1420         pci_enable_busmaster(dev);
1421
1422         rid = RE_PCI_LOIO;
1423         sc->re_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
1424                                             RF_ACTIVE);
1425
1426         if (sc->re_res == NULL) {
1427                 device_printf(dev, "couldn't map ports\n");
1428                 error = ENXIO;
1429                 goto fail;
1430         }
1431
1432         sc->re_btag = rman_get_bustag(sc->re_res);
1433         sc->re_bhandle = rman_get_bushandle(sc->re_res);
1434
1435         /* Allocate interrupt */
1436         rid = 0;
1437         sc->re_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1438                                             RF_SHAREABLE | RF_ACTIVE);
1439
1440         if (sc->re_irq == NULL) {
1441                 device_printf(dev, "couldn't map interrupt\n");
1442                 error = ENXIO;
1443                 goto fail;
1444         }
1445
1446         /* Reset the adapter. */
1447         re_reset(sc, 0);
1448
1449         if (RE_IS_8139CP(sc)) {
1450                 sc->re_bus_speed = 33; /* XXX */
1451         } else if (sc->re_caps & RE_C_PCIE) {
1452                 sc->re_bus_speed = 125;
1453         } else {
1454                 uint8_t cfg2;
1455
1456                 cfg2 = CSR_READ_1(sc, RE_CFG2);
1457                 switch (cfg2 & RE_CFG2_PCICLK_MASK) {
1458                 case RE_CFG2_PCICLK_33MHZ:
1459                         sc->re_bus_speed = 33;
1460                         break;
1461                 case RE_CFG2_PCICLK_66MHZ:
1462                         sc->re_bus_speed = 66;
1463                         break;
1464                 default:
1465                         device_printf(dev, "unknown bus speed, assume 33MHz\n");
1466                         sc->re_bus_speed = 33;
1467                         break;
1468                 }
1469                 if (cfg2 & RE_CFG2_PCI64)
1470                         sc->re_caps |= RE_C_PCI64;
1471         }
1472         device_printf(dev, "Hardware rev. 0x%08x; MAC ver. 0x%02x; "
1473                       "PCI%s %dMHz\n",
1474                       sc->re_hwrev, sc->re_macver,
1475                       (sc->re_caps & RE_C_PCIE) ?
1476                       "-E" : ((sc->re_caps & RE_C_PCI64) ? "64" : "32"),
1477                       sc->re_bus_speed);
1478
1479         /*
1480          * NOTE:
1481          * DO NOT try to adjust config1 and config5 which was spotted in
1482          * Realtek's Linux drivers.  It will _permanently_ damage certain
1483          * cards EEPROM, e.g. one of my 8168B (0x38000000) card ...
1484          */
1485
1486         re_get_eaddr(sc, eaddr);
1487
1488         if (!RE_IS_8139CP(sc)) {
1489                 /* Set RX length mask */
1490                 sc->re_rxlenmask = RE_RDESC_STAT_GFRAGLEN;
1491                 sc->re_txstart = RE_GTXSTART;
1492         } else {
1493                 /* Set RX length mask */
1494                 sc->re_rxlenmask = RE_RDESC_STAT_FRAGLEN;
1495                 sc->re_txstart = RE_TXSTART;
1496         }
1497
1498         /* Allocate DMA stuffs */
1499         error = re_allocmem(dev);
1500         if (error)
1501                 goto fail;
1502
1503         /*
1504          * Apply some magic PCI settings from Realtek ...
1505          */
1506         if (RE_IS_8169(sc)) {
1507                 CSR_WRITE_1(sc, 0x82, 1);
1508                 pci_write_config(dev, PCIR_CACHELNSZ, 0x8, 1);
1509         }
1510         pci_write_config(dev, PCIR_LATTIMER, 0x40, 1);
1511
1512         if (sc->re_caps & RE_C_MAC2) {
1513                 /*
1514                  * Following part is extracted from Realtek BSD driver v176.
1515                  * However, this does _not_ make much/any sense:
1516                  * 8168C's PCI Express device control is located at 0x78,
1517                  * so the reading from 0x79 (higher part of 0x78) and setting
1518                  * the 4~6bits intend to enlarge the "max read request size"
1519                  * (we will do it).  The content of the rest part of this
1520                  * register is not meaningful to other PCI registers, so
1521                  * writing the value to 0x54 could be completely wrong.
1522                  * 0x80 is the lower part of PCI Express device status, non-
1523                  * reserved bits are RW1C, writing 0 to them will not have
1524                  * any effect at all.
1525                  */
1526 #ifdef foo
1527                 uint8_t val;
1528
1529                 val = pci_read_config(dev, 0x79, 1);
1530                 val = (val & ~0x70) | 0x50;
1531                 pci_write_config(dev, 0x54, val, 1);
1532                 pci_write_config(dev, 0x80, 0, 1);
1533 #endif
1534         }
1535
1536         /*
1537          * Apply some PHY fixup from Realtek ...
1538          */
1539         if (sc->re_hwrev == RE_HWREV_8110S) {
1540                 CSR_WRITE_1(sc, 0x82, 1);
1541                 re_miibus_writereg(dev, 1, 0xb, 0);
1542         }
1543         if (sc->re_caps & RE_C_PHYPMGT) {
1544                 /* Power up PHY */
1545                 re_miibus_writereg(dev, 1, 0x1f, 0);
1546                 re_miibus_writereg(dev, 1, 0xe, 0);
1547         }
1548
1549         /* Do MII setup */
1550         if (mii_phy_probe(dev, &sc->re_miibus,
1551             re_ifmedia_upd, re_ifmedia_sts)) {
1552                 device_printf(dev, "MII without any phy!\n");
1553                 error = ENXIO;
1554                 goto fail;
1555         }
1556
1557         ifp = &sc->arpcom.ac_if;
1558         ifp->if_softc = sc;
1559         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1560         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1561         ifp->if_ioctl = re_ioctl;
1562         ifp->if_start = re_start;
1563 #ifdef DEVICE_POLLING
1564         ifp->if_poll = re_poll;
1565 #endif
1566         ifp->if_watchdog = re_watchdog;
1567         ifp->if_init = re_init;
1568         if (!RE_IS_8139CP(sc)) /* XXX */
1569                 ifp->if_baudrate = 1000000000;
1570         else
1571                 ifp->if_baudrate = 100000000;
1572         ifq_set_maxlen(&ifp->if_snd, qlen);
1573         ifq_set_ready(&ifp->if_snd);
1574
1575         ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1576         if (sc->re_caps & RE_C_HWCSUM)
1577                 ifp->if_capabilities |= IFCAP_HWCSUM;
1578
1579         ifp->if_capenable = ifp->if_capabilities;
1580         if (ifp->if_capabilities & IFCAP_HWCSUM)
1581                 ifp->if_hwassist = RE_CSUM_FEATURES;
1582         else
1583                 ifp->if_hwassist = 0;
1584
1585         /*
1586          * Call MI attach routine.
1587          */
1588         ether_ifattach(ifp, eaddr, NULL);
1589
1590 #ifdef RE_DIAG
1591         /*
1592          * Perform hardware diagnostic on the original RTL8169.
1593          * Some 32-bit cards were incorrectly wired and would
1594          * malfunction if plugged into a 64-bit slot.
1595          */
1596         if (sc->re_hwrev == RE_HWREV_8169) {
1597                 lwkt_serialize_enter(ifp->if_serializer);
1598                 error = re_diag(sc);
1599                 lwkt_serialize_exit(ifp->if_serializer);
1600
1601                 if (error) {
1602                         device_printf(dev, "hardware diagnostic failure\n");
1603                         ether_ifdetach(ifp);
1604                         goto fail;
1605                 }
1606         }
1607 #endif  /* RE_DIAG */
1608
1609         /* Hook interrupt last to avoid having to lock softc */
1610         error = bus_setup_intr(dev, sc->re_irq, INTR_MPSAFE, re_intr, sc,
1611                                &sc->re_intrhand, ifp->if_serializer);
1612
1613         if (error) {
1614                 device_printf(dev, "couldn't set up irq\n");
1615                 ether_ifdetach(ifp);
1616                 goto fail;
1617         }
1618
1619         ifp->if_cpuid = ithread_cpuid(rman_get_start(sc->re_irq));
1620         KKASSERT(ifp->if_cpuid >= 0 && ifp->if_cpuid < ncpus);
1621
1622 fail:
1623         if (error)
1624                 re_detach(dev);
1625
1626         return (error);
1627 }
1628
1629 /*
1630  * Shutdown hardware and free up resources. This can be called any
1631  * time after the mutex has been initialized. It is called in both
1632  * the error case in attach and the normal detach case so it needs
1633  * to be careful about only freeing resources that have actually been
1634  * allocated.
1635  */
1636 static int
1637 re_detach(device_t dev)
1638 {
1639         struct re_softc *sc = device_get_softc(dev);
1640         struct ifnet *ifp = &sc->arpcom.ac_if;
1641
1642         /* These should only be active if attach succeeded */
1643         if (device_is_attached(dev)) {
1644                 lwkt_serialize_enter(ifp->if_serializer);
1645                 re_stop(sc);
1646                 bus_teardown_intr(dev, sc->re_irq, sc->re_intrhand);
1647                 lwkt_serialize_exit(ifp->if_serializer);
1648
1649                 ether_ifdetach(ifp);
1650         }
1651         if (sc->re_miibus)
1652                 device_delete_child(dev, sc->re_miibus);
1653         bus_generic_detach(dev);
1654
1655         if (sc->re_sysctl_tree != NULL)
1656                 sysctl_ctx_free(&sc->re_sysctl_ctx);
1657
1658         if (sc->re_irq)
1659                 bus_release_resource(dev, SYS_RES_IRQ, 0, sc->re_irq);
1660         if (sc->re_res) {
1661                 bus_release_resource(dev, SYS_RES_IOPORT, RE_PCI_LOIO,
1662                                      sc->re_res);
1663         }
1664
1665         /* Free DMA stuffs */
1666         re_freemem(dev);
1667
1668         return(0);
1669 }
1670
1671 static void
1672 re_setup_rxdesc(struct re_softc *sc, int idx)
1673 {
1674         bus_addr_t paddr;
1675         uint32_t cmdstat;
1676         struct re_desc *d;
1677
1678         paddr = sc->re_ldata.re_rx_paddr[idx];
1679         d = &sc->re_ldata.re_rx_list[idx];
1680
1681         d->re_bufaddr_lo = htole32(RE_ADDR_LO(paddr));
1682         d->re_bufaddr_hi = htole32(RE_ADDR_HI(paddr));
1683
1684         cmdstat = sc->re_rxbuf_size | RE_RDESC_CMD_OWN;
1685         if (idx == (sc->re_rx_desc_cnt - 1))
1686                 cmdstat |= RE_RDESC_CMD_EOR;
1687         d->re_cmdstat = htole32(cmdstat);
1688 }
1689
1690 static int
1691 re_newbuf_std(struct re_softc *sc, int idx, int init)
1692 {
1693         bus_dma_segment_t seg;
1694         bus_dmamap_t map;
1695         struct mbuf *m;
1696         int error, nsegs;
1697
1698         m = m_getcl(init ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR);
1699         if (m == NULL) {
1700                 error = ENOBUFS;
1701
1702                 if (init) {
1703                         if_printf(&sc->arpcom.ac_if, "m_getcl failed\n");
1704                         return error;
1705                 } else {
1706                         goto back;
1707                 }
1708         }
1709         m->m_len = m->m_pkthdr.len = MCLBYTES;
1710
1711         /*
1712          * NOTE:
1713          * re(4) chips need address of the receive buffer to be 8-byte
1714          * aligned, so don't call m_adj(m, ETHER_ALIGN) here.
1715          */
1716
1717         error = bus_dmamap_load_mbuf_segment(sc->re_ldata.re_rx_mtag,
1718                         sc->re_ldata.re_rx_spare, m,
1719                         &seg, 1, &nsegs, BUS_DMA_NOWAIT);
1720         if (error) {
1721                 m_freem(m);
1722                 if (init) {
1723                         if_printf(&sc->arpcom.ac_if, "can't load RX mbuf\n");
1724                         return error;
1725                 } else {
1726                         goto back;
1727                 }
1728         }
1729
1730         if (!init) {
1731                 bus_dmamap_sync(sc->re_ldata.re_rx_mtag,
1732                                 sc->re_ldata.re_rx_dmamap[idx],
1733                                 BUS_DMASYNC_POSTREAD);
1734                 bus_dmamap_unload(sc->re_ldata.re_rx_mtag,
1735                                   sc->re_ldata.re_rx_dmamap[idx]);
1736         }
1737         sc->re_ldata.re_rx_mbuf[idx] = m;
1738         sc->re_ldata.re_rx_paddr[idx] = seg.ds_addr;
1739
1740         map = sc->re_ldata.re_rx_dmamap[idx];
1741         sc->re_ldata.re_rx_dmamap[idx] = sc->re_ldata.re_rx_spare;
1742         sc->re_ldata.re_rx_spare = map;
1743 back:
1744         re_setup_rxdesc(sc, idx);
1745         return error;
1746 }
1747
1748 static int
1749 re_newbuf_jumbo(struct re_softc *sc, int idx, int init)
1750 {
1751         struct mbuf *m;
1752         struct re_jbuf *jbuf;
1753         int error = 0;
1754
1755         MGETHDR(m, init ? MB_WAIT : MB_DONTWAIT, MT_DATA);
1756         if (m == NULL) {
1757                 error = ENOBUFS;
1758                 if (init) {
1759                         if_printf(&sc->arpcom.ac_if, "MGETHDR failed\n");
1760                         return error;
1761                 } else {
1762                         goto back;
1763                 }
1764         }
1765
1766         jbuf = re_jbuf_alloc(sc);
1767         if (jbuf == NULL) {
1768                 m_freem(m);
1769
1770                 error = ENOBUFS;
1771                 if (init) {
1772                         if_printf(&sc->arpcom.ac_if, "jpool is empty\n");
1773                         return error;
1774                 } else {
1775                         goto back;
1776                 }
1777         }
1778
1779         m->m_ext.ext_arg = jbuf;
1780         m->m_ext.ext_buf = jbuf->re_buf;
1781         m->m_ext.ext_free = re_jbuf_free;
1782         m->m_ext.ext_ref = re_jbuf_ref;
1783         m->m_ext.ext_size = sc->re_rxbuf_size;
1784
1785         m->m_data = m->m_ext.ext_buf;
1786         m->m_flags |= M_EXT;
1787         m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1788
1789         /*
1790          * NOTE:
1791          * Some re(4) chips(e.g. RTL8101E) need address of the receive buffer
1792          * to be 8-byte aligned, so don't call m_adj(m, ETHER_ALIGN) here.
1793          */
1794
1795         sc->re_ldata.re_rx_mbuf[idx] = m;
1796         sc->re_ldata.re_rx_paddr[idx] = jbuf->re_paddr;
1797 back:
1798         re_setup_rxdesc(sc, idx);
1799         return error;
1800 }
1801
1802 static int
1803 re_tx_list_init(struct re_softc *sc)
1804 {
1805         bzero(sc->re_ldata.re_tx_list, RE_TX_LIST_SZ(sc));
1806
1807         sc->re_ldata.re_tx_prodidx = 0;
1808         sc->re_ldata.re_tx_considx = 0;
1809         sc->re_ldata.re_tx_free = sc->re_tx_desc_cnt;
1810
1811         return(0);
1812 }
1813
1814 static int
1815 re_rx_list_init(struct re_softc *sc)
1816 {
1817         int i, error;
1818
1819         bzero(sc->re_ldata.re_rx_list, RE_RX_LIST_SZ(sc));
1820
1821         for (i = 0; i < sc->re_rx_desc_cnt; i++) {
1822                 error = sc->re_newbuf(sc, i, 1);
1823                 if (error)
1824                         return(error);
1825         }
1826
1827         sc->re_ldata.re_rx_prodidx = 0;
1828         sc->re_head = sc->re_tail = NULL;
1829
1830         return(0);
1831 }
1832
1833 #define RE_IP4_PACKET   0x1
1834 #define RE_TCP_PACKET   0x2
1835 #define RE_UDP_PACKET   0x4
1836
1837 static __inline uint8_t
1838 re_packet_type(struct re_softc *sc, uint32_t rxstat, uint32_t rxctrl)
1839 {
1840         uint8_t packet_type = 0;
1841
1842         if (sc->re_caps & RE_C_MAC2) {
1843                 if (rxctrl & RE_RDESC_CTL_PROTOIP4)
1844                         packet_type |= RE_IP4_PACKET;
1845         } else {
1846                 if (rxstat & RE_RDESC_STAT_PROTOID)
1847                         packet_type |= RE_IP4_PACKET;
1848         }
1849         if (RE_TCPPKT(rxstat))
1850                 packet_type |= RE_TCP_PACKET;
1851         else if (RE_UDPPKT(rxstat))
1852                 packet_type |= RE_UDP_PACKET;
1853         return packet_type;
1854 }
1855
1856 /*
1857  * RX handler for C+ and 8169. For the gigE chips, we support
1858  * the reception of jumbo frames that have been fragmented
1859  * across multiple 2K mbuf cluster buffers.
1860  */
1861 static int
1862 re_rxeof(struct re_softc *sc)
1863 {
1864         struct ifnet *ifp = &sc->arpcom.ac_if;
1865         struct mbuf *m;
1866         struct re_desc  *cur_rx;
1867         uint32_t rxstat, rxctrl;
1868         int i, total_len, rx = 0;
1869         struct mbuf_chain chain[MAXCPU];
1870
1871         ether_input_chain_init(chain);
1872
1873         for (i = sc->re_ldata.re_rx_prodidx;
1874              RE_OWN(&sc->re_ldata.re_rx_list[i]) == 0; RE_RXDESC_INC(sc, i)) {
1875                 cur_rx = &sc->re_ldata.re_rx_list[i];
1876                 m = sc->re_ldata.re_rx_mbuf[i];
1877                 total_len = RE_RXBYTES(cur_rx);
1878                 rxstat = le32toh(cur_rx->re_cmdstat);
1879                 rxctrl = le32toh(cur_rx->re_control);
1880
1881                 rx = 1;
1882
1883 #ifdef INVARIANTS
1884                 if (sc->re_flags & RE_F_USE_JPOOL)
1885                         KKASSERT(rxstat & RE_RDESC_STAT_EOF);
1886 #endif
1887
1888                 if ((rxstat & RE_RDESC_STAT_EOF) == 0) {
1889                         if (sc->re_flags & RE_F_DROP_RXFRAG) {
1890                                 re_setup_rxdesc(sc, i);
1891                                 continue;
1892                         }
1893
1894                         if (sc->re_newbuf(sc, i, 0)) {
1895                                 /* Drop upcoming fragments */
1896                                 sc->re_flags |= RE_F_DROP_RXFRAG;
1897                                 continue;
1898                         }
1899
1900                         m->m_len = MCLBYTES;
1901                         if (sc->re_head == NULL) {
1902                                 sc->re_head = sc->re_tail = m;
1903                         } else {
1904                                 sc->re_tail->m_next = m;
1905                                 sc->re_tail = m;
1906                         }
1907                         continue;
1908                 } else if (sc->re_flags & RE_F_DROP_RXFRAG) {
1909                         /*
1910                          * Last fragment of a multi-fragment packet.
1911                          *
1912                          * Since error already happened, this fragment
1913                          * must be dropped as well as the fragment chain.
1914                          */
1915                         re_setup_rxdesc(sc, i);
1916                         re_free_rxchain(sc);
1917                         sc->re_flags &= ~RE_F_DROP_RXFRAG;
1918                         continue;
1919                 }
1920
1921                 /*
1922                  * NOTE: for the 8139C+, the frame length field
1923                  * is always 12 bits in size, but for the gigE chips,
1924                  * it is 13 bits (since the max RX frame length is 16K).
1925                  * Unfortunately, all 32 bits in the status word
1926                  * were already used, so to make room for the extra
1927                  * length bit, RealTek took out the 'frame alignment
1928                  * error' bit and shifted the other status bits
1929                  * over one slot. The OWN, EOR, FS and LS bits are
1930                  * still in the same places. We have already extracted
1931                  * the frame length and checked the OWN bit, so rather
1932                  * than using an alternate bit mapping, we shift the
1933                  * status bits one space to the right so we can evaluate
1934                  * them using the 8169 status as though it was in the
1935                  * same format as that of the 8139C+.
1936                  */
1937                 if (!RE_IS_8139CP(sc))
1938                         rxstat >>= 1;
1939
1940                 if (rxstat & RE_RDESC_STAT_RXERRSUM) {
1941                         ifp->if_ierrors++;
1942                         /*
1943                          * If this is part of a multi-fragment packet,
1944                          * discard all the pieces.
1945                          */
1946                         re_free_rxchain(sc);
1947                         re_setup_rxdesc(sc, i);
1948                         continue;
1949                 }
1950
1951                 /*
1952                  * If allocating a replacement mbuf fails,
1953                  * reload the current one.
1954                  */
1955
1956                 if (sc->re_newbuf(sc, i, 0)) {
1957                         ifp->if_ierrors++;
1958                         continue;
1959                 }
1960
1961                 if (sc->re_head != NULL) {
1962                         m->m_len = total_len % MCLBYTES;
1963                         /* 
1964                          * Special case: if there's 4 bytes or less
1965                          * in this buffer, the mbuf can be discarded:
1966                          * the last 4 bytes is the CRC, which we don't
1967                          * care about anyway.
1968                          */
1969                         if (m->m_len <= ETHER_CRC_LEN) {
1970                                 sc->re_tail->m_len -=
1971                                     (ETHER_CRC_LEN - m->m_len);
1972                                 m_freem(m);
1973                         } else {
1974                                 m->m_len -= ETHER_CRC_LEN;
1975                                 sc->re_tail->m_next = m;
1976                         }
1977                         m = sc->re_head;
1978                         sc->re_head = sc->re_tail = NULL;
1979                         m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1980                 } else {
1981                         m->m_pkthdr.len = m->m_len =
1982                             (total_len - ETHER_CRC_LEN);
1983                 }
1984
1985                 ifp->if_ipackets++;
1986                 m->m_pkthdr.rcvif = ifp;
1987
1988                 /* Do RX checksumming if enabled */
1989
1990                 if (ifp->if_capenable & IFCAP_RXCSUM) {
1991                         uint8_t packet_type;
1992
1993                         packet_type = re_packet_type(sc, rxstat, rxctrl);
1994
1995                         /* Check IP header checksum */
1996                         if (packet_type & RE_IP4_PACKET) {
1997                                 m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
1998                                 if ((rxstat & RE_RDESC_STAT_IPSUMBAD) == 0)
1999                                         m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2000                         }
2001
2002                         /* Check TCP/UDP checksum */
2003                         if (((packet_type & RE_TCP_PACKET) &&
2004                              (rxstat & RE_RDESC_STAT_TCPSUMBAD) == 0) ||
2005                             ((packet_type & RE_UDP_PACKET) &&
2006                              (rxstat & RE_RDESC_STAT_UDPSUMBAD) == 0)) {
2007                                 m->m_pkthdr.csum_flags |=
2008                                     CSUM_DATA_VALID|CSUM_PSEUDO_HDR|
2009                                     CSUM_FRAG_NOT_CHECKED;
2010                                 m->m_pkthdr.csum_data = 0xffff;
2011                         }
2012                 }
2013
2014                 if (rxctrl & RE_RDESC_CTL_HASTAG) {
2015                         m->m_flags |= M_VLANTAG;
2016                         m->m_pkthdr.ether_vlantag =
2017                                 be16toh((rxctrl & RE_RDESC_CTL_TAGDATA));
2018                 }
2019                 ether_input_chain(ifp, m, NULL, chain);
2020         }
2021
2022         ether_input_dispatch(chain);
2023
2024         sc->re_ldata.re_rx_prodidx = i;
2025
2026         return rx;
2027 }
2028
2029 #undef RE_IP4_PACKET
2030 #undef RE_TCP_PACKET
2031 #undef RE_UDP_PACKET
2032
2033 static int
2034 re_tx_collect(struct re_softc *sc)
2035 {
2036         struct ifnet *ifp = &sc->arpcom.ac_if;
2037         uint32_t txstat;
2038         int idx, tx = 0;
2039
2040         for (idx = sc->re_ldata.re_tx_considx;
2041              sc->re_ldata.re_tx_free < sc->re_tx_desc_cnt;
2042              RE_TXDESC_INC(sc, idx)) {
2043                 txstat = le32toh(sc->re_ldata.re_tx_list[idx].re_cmdstat);
2044                 if (txstat & RE_TDESC_CMD_OWN)
2045                         break;
2046
2047                 tx = 1;
2048
2049                 sc->re_ldata.re_tx_list[idx].re_bufaddr_lo = 0;
2050
2051                 /*
2052                  * We only stash mbufs in the last descriptor
2053                  * in a fragment chain, which also happens to
2054                  * be the only place where the TX status bits
2055                  * are valid.
2056                  */
2057                 if (txstat & RE_TDESC_CMD_EOF) {
2058                         bus_dmamap_unload(sc->re_ldata.re_tx_mtag,
2059                             sc->re_ldata.re_tx_dmamap[idx]);
2060                         m_freem(sc->re_ldata.re_tx_mbuf[idx]);
2061                         sc->re_ldata.re_tx_mbuf[idx] = NULL;
2062                         if (txstat & (RE_TDESC_STAT_EXCESSCOL|
2063                             RE_TDESC_STAT_COLCNT))
2064                                 ifp->if_collisions++;
2065                         if (txstat & RE_TDESC_STAT_TXERRSUM)
2066                                 ifp->if_oerrors++;
2067                         else
2068                                 ifp->if_opackets++;
2069                 }
2070                 sc->re_ldata.re_tx_free++;
2071         }
2072         sc->re_ldata.re_tx_considx = idx;
2073
2074         return tx;
2075 }
2076
2077 static int
2078 re_txeof(struct re_softc *sc)
2079 {
2080         struct ifnet *ifp = &sc->arpcom.ac_if;
2081         int tx;
2082
2083         tx = re_tx_collect(sc);
2084
2085         /* There is enough free TX descs */
2086         if (sc->re_ldata.re_tx_free > RE_TXDESC_SPARE)
2087                 ifp->if_flags &= ~IFF_OACTIVE;
2088
2089         /*
2090          * Some chips will ignore a second TX request issued while an
2091          * existing transmission is in progress. If the transmitter goes
2092          * idle but there are still packets waiting to be sent, we need
2093          * to restart the channel here to flush them out. This only seems
2094          * to be required with the PCIe devices.
2095          */
2096         if (sc->re_ldata.re_tx_free < sc->re_tx_desc_cnt)
2097                 CSR_WRITE_1(sc, sc->re_txstart, RE_TXSTART_START);
2098         else
2099                 ifp->if_timer = 0;
2100
2101         return tx;
2102 }
2103
2104 static void
2105 re_tick(void *xsc)
2106 {
2107         struct re_softc *sc = xsc;
2108
2109         lwkt_serialize_enter(sc->arpcom.ac_if.if_serializer);
2110         re_tick_serialized(xsc);
2111         lwkt_serialize_exit(sc->arpcom.ac_if.if_serializer);
2112 }
2113
2114 static void
2115 re_tick_serialized(void *xsc)
2116 {
2117         struct re_softc *sc = xsc;
2118         struct ifnet *ifp = &sc->arpcom.ac_if;
2119         struct mii_data *mii;
2120
2121         ASSERT_SERIALIZED(ifp->if_serializer);
2122
2123         mii = device_get_softc(sc->re_miibus);
2124         mii_tick(mii);
2125         if (sc->re_flags & RE_F_LINKED) {
2126                 if (!(mii->mii_media_status & IFM_ACTIVE))
2127                         sc->re_flags &= ~RE_F_LINKED;
2128         } else {
2129                 if (mii->mii_media_status & IFM_ACTIVE &&
2130                     IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
2131                         sc->re_flags |= RE_F_LINKED;
2132                         if (!ifq_is_empty(&ifp->if_snd))
2133                                 if_devstart(ifp);
2134                 }
2135         }
2136
2137         callout_reset(&sc->re_timer, hz, re_tick, sc);
2138 }
2139
2140 #ifdef DEVICE_POLLING
2141
2142 static void
2143 re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2144 {
2145         struct re_softc *sc = ifp->if_softc;
2146
2147         ASSERT_SERIALIZED(ifp->if_serializer);
2148
2149         switch(cmd) {
2150         case POLL_REGISTER:
2151                 /* disable interrupts */
2152                 re_setup_intr(sc, 0, RE_IMTYPE_NONE);
2153                 break;
2154
2155         case POLL_DEREGISTER:
2156                 /* enable interrupts */
2157                 re_setup_intr(sc, 1, sc->re_imtype);
2158                 break;
2159
2160         default:
2161                 sc->rxcycles = count;
2162                 re_rxeof(sc);
2163                 re_txeof(sc);
2164
2165                 if (!ifq_is_empty(&ifp->if_snd))
2166                         if_devstart(ifp);
2167
2168                 if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2169                         uint16_t       status;
2170
2171                         status = CSR_READ_2(sc, RE_ISR);
2172                         if (status == 0xffff)
2173                                 return;
2174                         if (status)
2175                                 CSR_WRITE_2(sc, RE_ISR, status);
2176
2177                         /*
2178                          * XXX check behaviour on receiver stalls.
2179                          */
2180
2181                         if (status & RE_ISR_SYSTEM_ERR)
2182                                 re_init(sc);
2183                 }
2184                 break;
2185         }
2186 }
2187 #endif /* DEVICE_POLLING */
2188
2189 static void
2190 re_intr(void *arg)
2191 {
2192         struct re_softc *sc = arg;
2193         struct ifnet *ifp = &sc->arpcom.ac_if;
2194         uint16_t status;
2195         int rx, tx;
2196
2197         ASSERT_SERIALIZED(ifp->if_serializer);
2198
2199         if ((sc->re_flags & RE_F_SUSPENDED) ||
2200             (ifp->if_flags & IFF_RUNNING) == 0)
2201                 return;
2202
2203         rx = tx = 0;
2204         for (;;) {
2205                 status = CSR_READ_2(sc, RE_ISR);
2206                 /* If the card has gone away the read returns 0xffff. */
2207                 if (status == 0xffff)
2208                         break;
2209                 if (status)
2210                         CSR_WRITE_2(sc, RE_ISR, status);
2211
2212                 if ((status & sc->re_intrs) == 0)
2213                         break;
2214
2215                 if (status & (sc->re_rx_ack | RE_ISR_RX_ERR))
2216                         rx |= re_rxeof(sc);
2217
2218                 if (status & (sc->re_tx_ack | RE_ISR_TX_ERR))
2219                         tx |= re_txeof(sc);
2220
2221                 if (status & RE_ISR_SYSTEM_ERR)
2222                         re_init(sc);
2223
2224                 if (status & RE_ISR_LINKCHG) {
2225                         callout_stop(&sc->re_timer);
2226                         re_tick_serialized(sc);
2227                 }
2228         }
2229
2230         if (sc->re_imtype == RE_IMTYPE_SIM) {
2231                 if ((sc->re_flags & RE_F_TIMER_INTR)) {
2232                         if ((tx | rx) == 0) {
2233                                 /*
2234                                  * Nothing needs to be processed, fallback
2235                                  * to use TX/RX interrupts.
2236                                  */
2237                                 re_setup_intr(sc, 1, RE_IMTYPE_NONE);
2238
2239                                 /*
2240                                  * Recollect, mainly to avoid the possible
2241                                  * race introduced by changing interrupt
2242                                  * masks.
2243                                  */
2244                                 re_rxeof(sc);
2245                                 tx = re_txeof(sc);
2246                         } else {
2247                                 CSR_WRITE_4(sc, RE_TIMERCNT, 1); /* reload */
2248                         }
2249                 } else if (tx | rx) {
2250                         /*
2251                          * Assume that using simulated interrupt moderation
2252                          * (hardware timer based) could reduce the interript
2253                          * rate.
2254                          */
2255                         re_setup_intr(sc, 1, RE_IMTYPE_SIM);
2256                 }
2257         }
2258
2259         if (tx && !ifq_is_empty(&ifp->if_snd))
2260                 if_devstart(ifp);
2261 }
2262
2263 static int
2264 re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0)
2265 {
2266         struct mbuf *m = *m_head;
2267         bus_dma_segment_t segs[RE_MAXSEGS];
2268         bus_dmamap_t map;
2269         int error, maxsegs, idx, i, nsegs;
2270         struct re_desc *d, *tx_ring;
2271         uint32_t cmd_csum, ctl_csum, vlantag;
2272
2273         KASSERT(sc->re_ldata.re_tx_free > RE_TXDESC_SPARE,
2274                 ("not enough free TX desc\n"));
2275
2276         map = sc->re_ldata.re_tx_dmamap[*idx0];
2277
2278         /*
2279          * Set up checksum offload. Note: checksum offload bits must
2280          * appear in all descriptors of a multi-descriptor transmit
2281          * attempt. (This is according to testing done with an 8169
2282          * chip. I'm not sure if this is a requirement or a bug.)
2283          */
2284         cmd_csum = ctl_csum = 0;
2285         if (m->m_pkthdr.csum_flags & CSUM_IP) {
2286                 cmd_csum |= RE_TDESC_CMD_IPCSUM;
2287                 ctl_csum |= RE_TDESC_CTL_IPCSUM;
2288         }
2289         if (m->m_pkthdr.csum_flags & CSUM_TCP) {
2290                 cmd_csum |= RE_TDESC_CMD_TCPCSUM;
2291                 ctl_csum |= RE_TDESC_CTL_TCPCSUM;
2292         }
2293         if (m->m_pkthdr.csum_flags & CSUM_UDP) {
2294                 cmd_csum |= RE_TDESC_CMD_UDPCSUM;
2295                 ctl_csum |= RE_TDESC_CTL_UDPCSUM;
2296         }
2297
2298         /* For MAC2 chips, csum flags are set on re_control */
2299         if (sc->re_caps & RE_C_MAC2)
2300                 cmd_csum = 0;
2301         else
2302                 ctl_csum = 0;
2303
2304         if ((sc->re_caps & RE_C_AUTOPAD) == 0) {
2305                 /*
2306                  * With some of the RealTek chips, using the checksum offload
2307                  * support in conjunction with the autopadding feature results
2308                  * in the transmission of corrupt frames. For example, if we
2309                  * need to send a really small IP fragment that's less than 60
2310                  * bytes in size, and IP header checksumming is enabled, the
2311                  * resulting ethernet frame that appears on the wire will
2312                  * have garbled payload. To work around this, if TX checksum
2313                  * offload is enabled, we always manually pad short frames out
2314                  * to the minimum ethernet frame size.
2315                  *
2316                  * Note: this appears unnecessary for TCP, and doing it for TCP
2317                  * with PCIe adapters seems to result in bad checksums.
2318                  */
2319                 if ((m->m_pkthdr.csum_flags &
2320                      (CSUM_DELAY_IP | CSUM_DELAY_DATA)) &&
2321                     (m->m_pkthdr.csum_flags & CSUM_TCP) == 0 &&
2322                     m->m_pkthdr.len < RE_MIN_FRAMELEN) {
2323                         error = m_devpad(m, RE_MIN_FRAMELEN);
2324                         if (error)
2325                                 goto back;
2326                 }
2327         }
2328
2329         vlantag = 0;
2330         if (m->m_flags & M_VLANTAG) {
2331                 vlantag = htobe16(m->m_pkthdr.ether_vlantag) |
2332                           RE_TDESC_CTL_INSTAG;
2333         }
2334
2335         maxsegs = sc->re_ldata.re_tx_free;
2336         if (maxsegs > RE_MAXSEGS)
2337                 maxsegs = RE_MAXSEGS;
2338
2339         error = bus_dmamap_load_mbuf_defrag(sc->re_ldata.re_tx_mtag, map,
2340                         m_head, segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
2341         if (error)
2342                 goto back;
2343
2344         m = *m_head;
2345         bus_dmamap_sync(sc->re_ldata.re_tx_mtag, map, BUS_DMASYNC_PREWRITE);
2346
2347         /*
2348          * Map the segment array into descriptors.  We also keep track
2349          * of the end of the ring and set the end-of-ring bits as needed,
2350          * and we set the ownership bits in all except the very first
2351          * descriptor, whose ownership bits will be turned on later.
2352          */
2353         tx_ring = sc->re_ldata.re_tx_list;
2354         idx = *idx0;
2355         i = 0;
2356         for (;;) {
2357                 uint32_t cmdstat;
2358
2359                 d = &tx_ring[idx];
2360
2361                 cmdstat = segs[i].ds_len;
2362                 d->re_bufaddr_lo = htole32(RE_ADDR_LO(segs[i].ds_addr));
2363                 d->re_bufaddr_hi = htole32(RE_ADDR_HI(segs[i].ds_addr));
2364                 if (i == 0)
2365                         cmdstat |= RE_TDESC_CMD_SOF;
2366                 else
2367                         cmdstat |= RE_TDESC_CMD_OWN;
2368                 if (idx == (sc->re_tx_desc_cnt - 1))
2369                         cmdstat |= RE_TDESC_CMD_EOR;
2370                 d->re_cmdstat = htole32(cmdstat | cmd_csum);
2371                 d->re_control = htole32(ctl_csum | vlantag);
2372
2373                 i++;
2374                 if (i == nsegs)
2375                         break;
2376                 RE_TXDESC_INC(sc, idx);
2377         }
2378         d->re_cmdstat |= htole32(RE_TDESC_CMD_EOF);
2379
2380         /* Transfer ownership of packet to the chip. */
2381         d->re_cmdstat |= htole32(RE_TDESC_CMD_OWN);
2382         if (*idx0 != idx)
2383                 tx_ring[*idx0].re_cmdstat |= htole32(RE_TDESC_CMD_OWN);
2384
2385         /*
2386          * Insure that the map for this transmission
2387          * is placed at the array index of the last descriptor
2388          * in this chain.
2389          */
2390         sc->re_ldata.re_tx_dmamap[*idx0] = sc->re_ldata.re_tx_dmamap[idx];
2391         sc->re_ldata.re_tx_dmamap[idx] = map;
2392
2393         sc->re_ldata.re_tx_mbuf[idx] = m;
2394         sc->re_ldata.re_tx_free -= nsegs;
2395
2396         RE_TXDESC_INC(sc, idx);
2397         *idx0 = idx;
2398 back:
2399         if (error) {
2400                 m_freem(*m_head);
2401                 *m_head = NULL;
2402         }
2403         return error;
2404 }
2405
2406 /*
2407  * Main transmit routine for C+ and gigE NICs.
2408  */
2409
2410 static void
2411 re_start(struct ifnet *ifp)
2412 {
2413         struct re_softc *sc = ifp->if_softc;
2414         struct mbuf *m_head;
2415         int idx, need_trans, oactive, error;
2416
2417         ASSERT_SERIALIZED(ifp->if_serializer);
2418
2419         if ((sc->re_flags & RE_F_LINKED) == 0) {
2420                 ifq_purge(&ifp->if_snd);
2421                 return;
2422         }
2423
2424         if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) != IFF_RUNNING)
2425                 return;
2426
2427         idx = sc->re_ldata.re_tx_prodidx;
2428
2429         need_trans = 0;
2430         oactive = 0;
2431         while (sc->re_ldata.re_tx_mbuf[idx] == NULL) {
2432                 if (sc->re_ldata.re_tx_free <= RE_TXDESC_SPARE) {
2433                         if (!oactive) {
2434                                 if (re_tx_collect(sc)) {
2435                                         oactive = 1;
2436                                         continue;
2437                                 }
2438                         }
2439                         ifp->if_flags |= IFF_OACTIVE;
2440                         break;
2441                 }
2442
2443                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
2444                 if (m_head == NULL)
2445                         break;
2446
2447                 error = re_encap(sc, &m_head, &idx);
2448                 if (error) {
2449                         /* m_head is freed by re_encap(), if we reach here */
2450                         ifp->if_oerrors++;
2451
2452                         if (error == EFBIG && !oactive) {
2453                                 if (re_tx_collect(sc)) {
2454                                         oactive = 1;
2455                                         continue;
2456                                 }
2457                         }
2458                         ifp->if_flags |= IFF_OACTIVE;
2459                         break;
2460                 }
2461
2462                 oactive = 0;
2463                 need_trans = 1;
2464
2465                 /*
2466                  * If there's a BPF listener, bounce a copy of this frame
2467                  * to him.
2468                  */
2469                 ETHER_BPF_MTAP(ifp, m_head);
2470         }
2471
2472         if (!need_trans)
2473                 return;
2474
2475         sc->re_ldata.re_tx_prodidx = idx;
2476
2477         /*
2478          * RealTek put the TX poll request register in a different
2479          * location on the 8169 gigE chip. I don't know why.
2480          */
2481         CSR_WRITE_1(sc, sc->re_txstart, RE_TXSTART_START);
2482
2483         /*
2484          * Set a timeout in case the chip goes out to lunch.
2485          */
2486         ifp->if_timer = 5;
2487 }
2488
2489 static void
2490 re_init(void *xsc)
2491 {
2492         struct re_softc *sc = xsc;
2493         struct ifnet *ifp = &sc->arpcom.ac_if;
2494         struct mii_data *mii;
2495         int error, framelen;
2496
2497         ASSERT_SERIALIZED(ifp->if_serializer);
2498
2499         mii = device_get_softc(sc->re_miibus);
2500
2501         /*
2502          * Cancel pending I/O and free all RX/TX buffers.
2503          */
2504         re_stop(sc);
2505
2506         if (sc->re_caps & RE_C_CONTIGRX) {
2507                 if (ifp->if_mtu > ETHERMTU) {
2508                         KKASSERT(sc->re_ldata.re_jbuf != NULL);
2509                         sc->re_flags |= RE_F_USE_JPOOL;
2510                         sc->re_rxbuf_size = RE_FRAMELEN_MAX;
2511                         sc->re_newbuf = re_newbuf_jumbo;
2512                 } else {
2513                         sc->re_flags &= ~RE_F_USE_JPOOL;
2514                         sc->re_rxbuf_size = MCLBYTES;
2515                         sc->re_newbuf = re_newbuf_std;
2516                 }
2517         }
2518
2519         /*
2520          * Adjust max read request size according to MTU; mainly to
2521          * improve TX performance for common case (ETHERMTU) on GigE
2522          * NICs.  However, this could _not_ be done on 10/100 only
2523          * NICs; their DMA engines will malfunction using non-default
2524          * max read request size.
2525          */
2526         if ((sc->re_caps & (RE_C_PCIE | RE_C_FASTE)) == RE_C_PCIE) {
2527                 if (ifp->if_mtu > ETHERMTU) {
2528                         /*
2529                          * 512 seems to be the only value that works
2530                          * reliably with jumbo frame
2531                          */
2532                         pcie_set_max_readrq(sc->re_dev,
2533                                 PCIEM_DEVCTL_MAX_READRQ_512);
2534                 } else {
2535                         pcie_set_max_readrq(sc->re_dev,
2536                                 PCIEM_DEVCTL_MAX_READRQ_4096);
2537                 }
2538         }
2539
2540         /*
2541          * Enable C+ RX and TX mode, as well as VLAN stripping and
2542          * RX checksum offload. We must configure the C+ register
2543          * before all others.
2544          */
2545         CSR_WRITE_2(sc, RE_CPLUS_CMD, RE_CPLUSCMD_RXENB | RE_CPLUSCMD_TXENB |
2546                     RE_CPLUSCMD_PCI_MRW |
2547                     (ifp->if_capenable & IFCAP_VLAN_HWTAGGING ?
2548                      RE_CPLUSCMD_VLANSTRIP : 0) |
2549                     (ifp->if_capenable & IFCAP_RXCSUM ?
2550                      RE_CPLUSCMD_RXCSUM_ENB : 0));
2551
2552         /*
2553          * Init our MAC address.  Even though the chipset
2554          * documentation doesn't mention it, we need to enter "Config
2555          * register write enable" mode to modify the ID registers.
2556          */
2557         CSR_WRITE_1(sc, RE_EECMD, RE_EEMODE_WRITECFG);
2558         CSR_WRITE_4(sc, RE_IDR0,
2559             htole32(*(uint32_t *)(&sc->arpcom.ac_enaddr[0])));
2560         CSR_WRITE_2(sc, RE_IDR4,
2561             htole16(*(uint16_t *)(&sc->arpcom.ac_enaddr[4])));
2562         CSR_WRITE_1(sc, RE_EECMD, RE_EEMODE_OFF);
2563
2564         /*
2565          * For C+ mode, initialize the RX descriptors and mbufs.
2566          */
2567         error = re_rx_list_init(sc);
2568         if (error) {
2569                 re_stop(sc);
2570                 return;
2571         }
2572         error = re_tx_list_init(sc);
2573         if (error) {
2574                 re_stop(sc);
2575                 return;
2576         }
2577
2578         /*
2579          * Load the addresses of the RX and TX lists into the chip.
2580          */
2581         CSR_WRITE_4(sc, RE_RXLIST_ADDR_HI,
2582             RE_ADDR_HI(sc->re_ldata.re_rx_list_addr));
2583         CSR_WRITE_4(sc, RE_RXLIST_ADDR_LO,
2584             RE_ADDR_LO(sc->re_ldata.re_rx_list_addr));
2585
2586         CSR_WRITE_4(sc, RE_TXLIST_ADDR_HI,
2587             RE_ADDR_HI(sc->re_ldata.re_tx_list_addr));
2588         CSR_WRITE_4(sc, RE_TXLIST_ADDR_LO,
2589             RE_ADDR_LO(sc->re_ldata.re_tx_list_addr));
2590
2591         /*
2592          * Enable transmit and receive.
2593          */
2594         CSR_WRITE_1(sc, RE_COMMAND, RE_CMD_TX_ENB|RE_CMD_RX_ENB);
2595
2596         /*
2597          * Set the initial TX and RX configuration.
2598          */
2599         if (sc->re_flags & RE_F_TESTMODE) {
2600                 if (!RE_IS_8139CP(sc))
2601                         CSR_WRITE_4(sc, RE_TXCFG,
2602                                     RE_TXCFG_CONFIG | RE_LOOPTEST_ON);
2603                 else
2604                         CSR_WRITE_4(sc, RE_TXCFG,
2605                                     RE_TXCFG_CONFIG | RE_LOOPTEST_ON_CPLUS);
2606         } else
2607                 CSR_WRITE_4(sc, RE_TXCFG, RE_TXCFG_CONFIG);
2608
2609         framelen = RE_FRAMELEN(ifp->if_mtu);
2610         if (framelen < MCLBYTES)
2611                 CSR_WRITE_1(sc, RE_EARLY_TX_THRESH, howmany(MCLBYTES, 128));
2612         else
2613                 CSR_WRITE_1(sc, RE_EARLY_TX_THRESH, howmany(framelen, 128));
2614
2615         CSR_WRITE_4(sc, RE_RXCFG, RE_RXCFG_CONFIG);
2616
2617         /*
2618          * Program the multicast filter, if necessary.
2619          */
2620         re_setmulti(sc);
2621
2622 #ifdef DEVICE_POLLING
2623         /*
2624          * Disable interrupts if we are polling.
2625          */
2626         if (ifp->if_flags & IFF_POLLING)
2627                 re_setup_intr(sc, 0, RE_IMTYPE_NONE);
2628         else    /* otherwise ... */
2629 #endif /* DEVICE_POLLING */
2630         /*
2631          * Enable interrupts.
2632          */
2633         if (sc->re_flags & RE_F_TESTMODE)
2634                 CSR_WRITE_2(sc, RE_IMR, 0);
2635         else
2636                 re_setup_intr(sc, 1, sc->re_imtype);
2637         CSR_WRITE_2(sc, RE_ISR, sc->re_intrs);
2638
2639         /* Start RX/TX process. */
2640         CSR_WRITE_4(sc, RE_MISSEDPKT, 0);
2641
2642 #ifdef notdef
2643         /* Enable receiver and transmitter. */
2644         CSR_WRITE_1(sc, RE_COMMAND, RE_CMD_TX_ENB|RE_CMD_RX_ENB);
2645 #endif
2646
2647         /*
2648          * For 8169 gigE NICs, set the max allowed RX packet
2649          * size so we can receive jumbo frames.
2650          */
2651         if (!RE_IS_8139CP(sc)) {
2652                 if (sc->re_caps & RE_C_CONTIGRX)
2653                         CSR_WRITE_2(sc, RE_MAXRXPKTLEN, sc->re_rxbuf_size);
2654                 else
2655                         CSR_WRITE_2(sc, RE_MAXRXPKTLEN, 16383);
2656         }
2657
2658         if (sc->re_flags & RE_F_TESTMODE)
2659                 return;
2660
2661         mii_mediachg(mii);
2662
2663         CSR_WRITE_1(sc, RE_CFG1, RE_CFG1_DRVLOAD|RE_CFG1_FULLDUPLEX);
2664
2665         ifp->if_flags |= IFF_RUNNING;
2666         ifp->if_flags &= ~IFF_OACTIVE;
2667
2668         callout_reset(&sc->re_timer, hz, re_tick, sc);
2669 }
2670
2671 /*
2672  * Set media options.
2673  */
2674 static int
2675 re_ifmedia_upd(struct ifnet *ifp)
2676 {
2677         struct re_softc *sc = ifp->if_softc;
2678         struct mii_data *mii;
2679
2680         ASSERT_SERIALIZED(ifp->if_serializer);
2681
2682         mii = device_get_softc(sc->re_miibus);
2683         mii_mediachg(mii);
2684
2685         return(0);
2686 }
2687
2688 /*
2689  * Report current media status.
2690  */
2691 static void
2692 re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2693 {
2694         struct re_softc *sc = ifp->if_softc;
2695         struct mii_data *mii;
2696
2697         ASSERT_SERIALIZED(ifp->if_serializer);
2698
2699         mii = device_get_softc(sc->re_miibus);
2700
2701         mii_pollstat(mii);
2702         ifmr->ifm_active = mii->mii_media_active;
2703         ifmr->ifm_status = mii->mii_media_status;
2704 }
2705
2706 static int
2707 re_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
2708 {
2709         struct re_softc *sc = ifp->if_softc;
2710         struct ifreq *ifr = (struct ifreq *) data;
2711         struct mii_data *mii;
2712         int error = 0, mask;
2713
2714         ASSERT_SERIALIZED(ifp->if_serializer);
2715
2716         switch(command) {
2717         case SIOCSIFMTU:
2718                 if (ifr->ifr_mtu > sc->re_maxmtu) {
2719                         error = EINVAL;
2720                 } else if (ifp->if_mtu != ifr->ifr_mtu) {
2721                         ifp->if_mtu = ifr->ifr_mtu;
2722                         if (ifp->if_flags & IFF_RUNNING)
2723                                 ifp->if_init(sc);
2724                 }
2725                 break;
2726
2727         case SIOCSIFFLAGS:
2728                 if (ifp->if_flags & IFF_UP) {
2729                         if (ifp->if_flags & IFF_RUNNING) {
2730                                 if ((ifp->if_flags ^ sc->re_if_flags) &
2731                                     (IFF_PROMISC | IFF_ALLMULTI))
2732                                         re_setmulti(sc);
2733                         } else {
2734                                 re_init(sc);
2735                         }
2736                 } else if (ifp->if_flags & IFF_RUNNING) {
2737                         re_stop(sc);
2738                 }
2739                 sc->re_if_flags = ifp->if_flags;
2740                 break;
2741
2742         case SIOCADDMULTI:
2743         case SIOCDELMULTI:
2744                 re_setmulti(sc);
2745                 break;
2746
2747         case SIOCGIFMEDIA:
2748         case SIOCSIFMEDIA:
2749                 mii = device_get_softc(sc->re_miibus);
2750                 error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2751                 break;
2752
2753         case SIOCSIFCAP:
2754                 mask = (ifr->ifr_reqcap ^ ifp->if_capenable) &
2755                        ifp->if_capabilities;
2756                 ifp->if_capenable ^= mask;
2757
2758                 if (mask & IFCAP_HWCSUM) {
2759                         if (ifp->if_capenable & IFCAP_TXCSUM)
2760                                 ifp->if_hwassist = RE_CSUM_FEATURES;
2761                         else
2762                                 ifp->if_hwassist = 0;
2763                 }
2764                 if (mask && (ifp->if_flags & IFF_RUNNING))
2765                         re_init(sc);
2766                 break;
2767
2768         default:
2769                 error = ether_ioctl(ifp, command, data);
2770                 break;
2771         }
2772         return(error);
2773 }
2774
2775 static void
2776 re_watchdog(struct ifnet *ifp)
2777 {
2778         struct re_softc *sc = ifp->if_softc;
2779
2780         ASSERT_SERIALIZED(ifp->if_serializer);
2781
2782         if_printf(ifp, "watchdog timeout\n");
2783
2784         ifp->if_oerrors++;
2785
2786         re_txeof(sc);
2787         re_rxeof(sc);
2788
2789         re_init(sc);
2790
2791         if (!ifq_is_empty(&ifp->if_snd))
2792                 if_devstart(ifp);
2793 }
2794
2795 /*
2796  * Stop the adapter and free any mbufs allocated to the
2797  * RX and TX lists.
2798  */
2799 static void
2800 re_stop(struct re_softc *sc)
2801 {
2802         struct ifnet *ifp = &sc->arpcom.ac_if;
2803         int i;
2804
2805         ASSERT_SERIALIZED(ifp->if_serializer);
2806
2807         /* Reset the adapter. */
2808         re_reset(sc, ifp->if_flags & IFF_RUNNING);
2809
2810         ifp->if_timer = 0;
2811         callout_stop(&sc->re_timer);
2812
2813         ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2814         sc->re_flags &= ~(RE_F_TIMER_INTR | RE_F_DROP_RXFRAG | RE_F_LINKED);
2815
2816         CSR_WRITE_1(sc, RE_COMMAND, 0x00);
2817         CSR_WRITE_2(sc, RE_IMR, 0x0000);
2818         CSR_WRITE_2(sc, RE_ISR, 0xFFFF);
2819
2820         re_free_rxchain(sc);
2821
2822         /* Free the TX list buffers. */
2823         for (i = 0; i < sc->re_tx_desc_cnt; i++) {
2824                 if (sc->re_ldata.re_tx_mbuf[i] != NULL) {
2825                         bus_dmamap_unload(sc->re_ldata.re_tx_mtag,
2826                                           sc->re_ldata.re_tx_dmamap[i]);
2827                         m_freem(sc->re_ldata.re_tx_mbuf[i]);
2828                         sc->re_ldata.re_tx_mbuf[i] = NULL;
2829                 }
2830         }
2831
2832         /* Free the RX list buffers. */
2833         for (i = 0; i < sc->re_rx_desc_cnt; i++) {
2834                 if (sc->re_ldata.re_rx_mbuf[i] != NULL) {
2835                         if ((sc->re_flags & RE_F_USE_JPOOL) == 0) {
2836                                 bus_dmamap_unload(sc->re_ldata.re_rx_mtag,
2837                                                   sc->re_ldata.re_rx_dmamap[i]);
2838                         }
2839                         m_freem(sc->re_ldata.re_rx_mbuf[i]);
2840                         sc->re_ldata.re_rx_mbuf[i] = NULL;
2841                 }
2842         }
2843 }
2844
2845 /*
2846  * Device suspend routine.  Stop the interface and save some PCI
2847  * settings in case the BIOS doesn't restore them properly on
2848  * resume.
2849  */
2850 static int
2851 re_suspend(device_t dev)
2852 {
2853 #ifndef BURN_BRIDGES
2854         int i;
2855 #endif
2856         struct re_softc *sc = device_get_softc(dev);
2857         struct ifnet *ifp = &sc->arpcom.ac_if;
2858
2859         lwkt_serialize_enter(ifp->if_serializer);
2860
2861         re_stop(sc);
2862
2863 #ifndef BURN_BRIDGES
2864         for (i = 0; i < 5; i++)
2865                 sc->saved_maps[i] = pci_read_config(dev, PCIR_MAPS + i * 4, 4);
2866         sc->saved_biosaddr = pci_read_config(dev, PCIR_BIOS, 4);
2867         sc->saved_intline = pci_read_config(dev, PCIR_INTLINE, 1);
2868         sc->saved_cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
2869         sc->saved_lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
2870 #endif
2871
2872         sc->re_flags |= RE_F_SUSPENDED;
2873
2874         lwkt_serialize_exit(ifp->if_serializer);
2875
2876         return (0);
2877 }
2878
2879 /*
2880  * Device resume routine.  Restore some PCI settings in case the BIOS
2881  * doesn't, re-enable busmastering, and restart the interface if
2882  * appropriate.
2883  */
2884 static int
2885 re_resume(device_t dev)
2886 {
2887         struct re_softc *sc = device_get_softc(dev);
2888         struct ifnet *ifp = &sc->arpcom.ac_if;
2889 #ifndef BURN_BRIDGES
2890         int i;
2891 #endif
2892
2893         lwkt_serialize_enter(ifp->if_serializer);
2894
2895 #ifndef BURN_BRIDGES
2896         /* better way to do this? */
2897         for (i = 0; i < 5; i++)
2898                 pci_write_config(dev, PCIR_MAPS + i * 4, sc->saved_maps[i], 4);
2899         pci_write_config(dev, PCIR_BIOS, sc->saved_biosaddr, 4);
2900         pci_write_config(dev, PCIR_INTLINE, sc->saved_intline, 1);
2901         pci_write_config(dev, PCIR_CACHELNSZ, sc->saved_cachelnsz, 1);
2902         pci_write_config(dev, PCIR_LATTIMER, sc->saved_lattimer, 1);
2903
2904         /* reenable busmastering */
2905         pci_enable_busmaster(dev);
2906         pci_enable_io(dev, SYS_RES_IOPORT);
2907 #endif
2908
2909         /* reinitialize interface if necessary */
2910         if (ifp->if_flags & IFF_UP)
2911                 re_init(sc);
2912
2913         sc->re_flags &= ~RE_F_SUSPENDED;
2914
2915         lwkt_serialize_exit(ifp->if_serializer);
2916
2917         return (0);
2918 }
2919
2920 /*
2921  * Stop all chip I/O so that the kernel's probe routines don't
2922  * get confused by errant DMAs when rebooting.
2923  */
2924 static void
2925 re_shutdown(device_t dev)
2926 {
2927         struct re_softc *sc = device_get_softc(dev);
2928         struct ifnet *ifp = &sc->arpcom.ac_if;
2929
2930         lwkt_serialize_enter(ifp->if_serializer);
2931         re_stop(sc);
2932         lwkt_serialize_exit(ifp->if_serializer);
2933 }
2934
2935 static int
2936 re_sysctl_rxtime(SYSCTL_HANDLER_ARGS)
2937 {
2938         struct re_softc *sc = arg1;
2939
2940         return re_sysctl_hwtime(oidp, arg1, arg2, req, &sc->re_rx_time);
2941 }
2942
2943 static int
2944 re_sysctl_txtime(SYSCTL_HANDLER_ARGS)
2945 {
2946         struct re_softc *sc = arg1;
2947
2948         return re_sysctl_hwtime(oidp, arg1, arg2, req, &sc->re_tx_time);
2949 }
2950
2951 static int
2952 re_sysctl_hwtime(SYSCTL_HANDLER_ARGS, int *hwtime)
2953 {
2954         struct re_softc *sc = arg1;
2955         struct ifnet *ifp = &sc->arpcom.ac_if;
2956         int error, v;
2957
2958         lwkt_serialize_enter(ifp->if_serializer);
2959
2960         v = *hwtime;
2961         error = sysctl_handle_int(oidp, &v, 0, req);
2962         if (error || req->newptr == NULL)
2963                 goto back;
2964
2965         if (v <= 0) {
2966                 error = EINVAL;
2967                 goto back;
2968         }
2969
2970         if (v != *hwtime) {
2971                 *hwtime = v;
2972
2973                 if ((ifp->if_flags & (IFF_RUNNING | IFF_POLLING)) ==
2974                     IFF_RUNNING && sc->re_imtype == RE_IMTYPE_HW)
2975                         re_setup_hw_im(sc);
2976         }
2977 back:
2978         lwkt_serialize_exit(ifp->if_serializer);
2979         return error;
2980 }
2981
2982 static int
2983 re_sysctl_simtime(SYSCTL_HANDLER_ARGS)
2984 {
2985         struct re_softc *sc = arg1;
2986         struct ifnet *ifp = &sc->arpcom.ac_if;
2987         int error, v;
2988
2989         lwkt_serialize_enter(ifp->if_serializer);
2990
2991         v = sc->re_sim_time;
2992         error = sysctl_handle_int(oidp, &v, 0, req);
2993         if (error || req->newptr == NULL)
2994                 goto back;
2995
2996         if (v <= 0) {
2997                 error = EINVAL;
2998                 goto back;
2999         }
3000
3001         if (v != sc->re_sim_time) {
3002                 sc->re_sim_time = v;
3003
3004                 if ((ifp->if_flags & (IFF_RUNNING | IFF_POLLING)) ==
3005                     IFF_RUNNING && sc->re_imtype == RE_IMTYPE_SIM) {
3006 #ifdef foo
3007                         int reg;
3008
3009                         /*
3010                          * Following code causes various strange
3011                          * performance problems.  Hmm ...
3012                          */
3013                         CSR_WRITE_2(sc, RE_IMR, 0);
3014                         if (!RE_IS_8139CP(sc))
3015                                 reg = RE_TIMERINT_8169;
3016                         else
3017                                 reg = RE_TIMERINT;
3018                         CSR_WRITE_4(sc, reg, 0);
3019                         CSR_READ_4(sc, reg); /* flush */
3020
3021                         CSR_WRITE_2(sc, RE_IMR, sc->re_intrs);
3022                         re_setup_sim_im(sc);
3023 #else
3024                         re_setup_intr(sc, 0, RE_IMTYPE_NONE);
3025                         DELAY(10);
3026                         re_setup_intr(sc, 1, RE_IMTYPE_SIM);
3027 #endif
3028                 }
3029         }
3030 back:
3031         lwkt_serialize_exit(ifp->if_serializer);
3032         return error;
3033 }
3034
3035 static int
3036 re_sysctl_imtype(SYSCTL_HANDLER_ARGS)
3037 {
3038         struct re_softc *sc = arg1;
3039         struct ifnet *ifp = &sc->arpcom.ac_if;
3040         int error, v;
3041
3042         lwkt_serialize_enter(ifp->if_serializer);
3043
3044         v = sc->re_imtype;
3045         error = sysctl_handle_int(oidp, &v, 0, req);
3046         if (error || req->newptr == NULL)
3047                 goto back;
3048
3049         if (v != RE_IMTYPE_HW && v != RE_IMTYPE_SIM && v != RE_IMTYPE_NONE) {
3050                 error = EINVAL;
3051                 goto back;
3052         }
3053         if (v == RE_IMTYPE_HW && (sc->re_caps & RE_C_HWIM) == 0) {
3054                 /* Can't do hardware interrupt moderation */
3055                 error = EOPNOTSUPP;
3056                 goto back;
3057         }
3058
3059         if (v != sc->re_imtype) {
3060                 sc->re_imtype = v;
3061                 if ((ifp->if_flags & (IFF_RUNNING | IFF_POLLING)) ==
3062                     IFF_RUNNING)
3063                         re_setup_intr(sc, 1, sc->re_imtype);
3064         }
3065 back:
3066         lwkt_serialize_exit(ifp->if_serializer);
3067         return error;
3068 }
3069
3070 static void
3071 re_setup_hw_im(struct re_softc *sc)
3072 {
3073         KKASSERT(sc->re_caps & RE_C_HWIM);
3074
3075         /*
3076          * Interrupt moderation
3077          *
3078          * 0xABCD
3079          * A - unknown (maybe TX related)
3080          * B - TX timer (unit: 25us)
3081          * C - unknown (maybe RX related)
3082          * D - RX timer (unit: 25us)
3083          *
3084          *
3085          * re(4)'s interrupt moderation is actually controlled by
3086          * two variables, like most other NICs (bge, bce etc.)
3087          * o  timer
3088          * o  number of packets [P]
3089          *
3090          * The logic relationship between these two variables is
3091          * similar to other NICs too:
3092          * if (timer expire || packets > [P])
3093          *     Interrupt is delivered
3094          *
3095          * Currently we only know how to set 'timer', but not
3096          * 'number of packets', which should be ~30, as far as I
3097          * tested (sink ~900Kpps, interrupt rate is 30KHz)
3098          */
3099         CSR_WRITE_2(sc, RE_IM,
3100                     RE_IM_RXTIME(sc->re_rx_time) |
3101                     RE_IM_TXTIME(sc->re_tx_time) |
3102                     RE_IM_MAGIC);
3103 }
3104
3105 static void
3106 re_disable_hw_im(struct re_softc *sc)
3107 {
3108         if (sc->re_caps & RE_C_HWIM)
3109                 CSR_WRITE_2(sc, RE_IM, 0);
3110 }
3111
3112 static void
3113 re_setup_sim_im(struct re_softc *sc)
3114 {
3115         if (!RE_IS_8139CP(sc)) {
3116                 uint32_t ticks;
3117
3118                 /*
3119                  * Datasheet says tick decreases at bus speed,
3120                  * but it seems the clock runs a little bit
3121                  * faster, so we do some compensation here.
3122                  */
3123                 ticks = (sc->re_sim_time * sc->re_bus_speed * 8) / 5;
3124                 CSR_WRITE_4(sc, RE_TIMERINT_8169, ticks);
3125         } else {
3126                 CSR_WRITE_4(sc, RE_TIMERINT, 0x400); /* XXX */
3127         }
3128         CSR_WRITE_4(sc, RE_TIMERCNT, 1); /* reload */
3129         sc->re_flags |= RE_F_TIMER_INTR;
3130 }
3131
3132 static void
3133 re_disable_sim_im(struct re_softc *sc)
3134 {
3135         if (!RE_IS_8139CP(sc))
3136                 CSR_WRITE_4(sc, RE_TIMERINT_8169, 0);
3137         else
3138                 CSR_WRITE_4(sc, RE_TIMERINT, 0);
3139         sc->re_flags &= ~RE_F_TIMER_INTR;
3140 }
3141
3142 static void
3143 re_config_imtype(struct re_softc *sc, int imtype)
3144 {
3145         switch (imtype) {
3146         case RE_IMTYPE_HW:
3147                 KKASSERT(sc->re_caps & RE_C_HWIM);
3148                 /* FALL THROUGH */
3149         case RE_IMTYPE_NONE:
3150                 sc->re_intrs = RE_INTRS;
3151                 sc->re_rx_ack = RE_ISR_RX_OK | RE_ISR_FIFO_OFLOW |
3152                                 RE_ISR_RX_OVERRUN;
3153                 sc->re_tx_ack = RE_ISR_TX_OK;
3154                 break;
3155
3156         case RE_IMTYPE_SIM:
3157                 sc->re_intrs = RE_INTRS_TIMER;
3158                 sc->re_rx_ack = RE_ISR_TIMEOUT_EXPIRED;
3159                 sc->re_tx_ack = RE_ISR_TIMEOUT_EXPIRED;
3160                 break;
3161
3162         default:
3163                 panic("%s: unknown imtype %d\n",
3164                       sc->arpcom.ac_if.if_xname, imtype);
3165         }
3166 }
3167
3168 static void
3169 re_setup_intr(struct re_softc *sc, int enable_intrs, int imtype)
3170 {
3171         re_config_imtype(sc, imtype);
3172
3173         if (enable_intrs)
3174                 CSR_WRITE_2(sc, RE_IMR, sc->re_intrs);
3175         else
3176                 CSR_WRITE_2(sc, RE_IMR, 0); 
3177
3178         switch (imtype) {
3179         case RE_IMTYPE_NONE:
3180                 re_disable_sim_im(sc);
3181                 re_disable_hw_im(sc);
3182                 break;
3183
3184         case RE_IMTYPE_HW:
3185                 KKASSERT(sc->re_caps & RE_C_HWIM);
3186                 re_disable_sim_im(sc);
3187                 re_setup_hw_im(sc);
3188                 break;
3189
3190         case RE_IMTYPE_SIM:
3191                 re_disable_hw_im(sc);
3192                 re_setup_sim_im(sc);
3193                 break;
3194
3195         default:
3196                 panic("%s: unknown imtype %d\n",
3197                       sc->arpcom.ac_if.if_xname, imtype);
3198         }
3199 }
3200
3201 static void
3202 re_get_eaddr(struct re_softc *sc, uint8_t *eaddr)
3203 {
3204         int i;
3205
3206         if (sc->re_macver == RE_MACVER_11 ||
3207             sc->re_macver == RE_MACVER_12 ||
3208             sc->re_macver == RE_MACVER_30 ||
3209             sc->re_macver == RE_MACVER_31) {
3210                 uint16_t re_did;
3211
3212                 re_get_eewidth(sc);
3213                 re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
3214                 if (re_did == 0x8128) {
3215                         uint16_t as[ETHER_ADDR_LEN / 2];
3216                         int eaddr_off;
3217
3218                         if (sc->re_macver == RE_MACVER_30 ||
3219                             sc->re_macver == RE_MACVER_31)
3220                                 eaddr_off = RE_EE_EADDR1;
3221                         else
3222                                 eaddr_off = RE_EE_EADDR0;
3223
3224                         /*
3225                          * Get station address from the EEPROM.
3226                          */
3227                         re_read_eeprom(sc, (caddr_t)as, eaddr_off, 3);
3228                         for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
3229                                 as[i] = le16toh(as[i]);
3230                         bcopy(as, eaddr, sizeof(eaddr));
3231                         return;
3232                 }
3233         }
3234
3235         /*
3236          * Get station address from IDRx.
3237          */
3238         for (i = 0; i < ETHER_ADDR_LEN; ++i)
3239                 eaddr[i] = CSR_READ_1(sc, RE_IDR0 + i);
3240 }
3241
3242 static int
3243 re_jpool_alloc(struct re_softc *sc)
3244 {
3245         struct re_list_data *ldata = &sc->re_ldata;
3246         struct re_jbuf *jbuf;
3247         bus_addr_t paddr;
3248         bus_size_t jpool_size;
3249         bus_dmamem_t dmem;
3250         caddr_t buf;
3251         int i, error;
3252
3253         lwkt_serialize_init(&ldata->re_jbuf_serializer);
3254
3255         ldata->re_jbuf = kmalloc(sizeof(struct re_jbuf) * RE_JBUF_COUNT(sc),
3256                                  M_DEVBUF, M_WAITOK | M_ZERO);
3257
3258         jpool_size = RE_JBUF_COUNT(sc) * RE_JBUF_SIZE;
3259
3260         error = bus_dmamem_coherent(sc->re_parent_tag,
3261                         RE_RXBUF_ALIGN, 0,
3262                         BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3263                         jpool_size, BUS_DMA_WAITOK, &dmem);
3264         if (error) {
3265                 device_printf(sc->re_dev, "could not allocate jumbo memory\n");
3266                 return error;
3267         }
3268         ldata->re_jpool_tag = dmem.dmem_tag;
3269         ldata->re_jpool_map = dmem.dmem_map;
3270         ldata->re_jpool = dmem.dmem_addr;
3271         paddr = dmem.dmem_busaddr;
3272
3273         /* ..and split it into 9KB chunks */
3274         SLIST_INIT(&ldata->re_jbuf_free);
3275
3276         buf = ldata->re_jpool;
3277         for (i = 0; i < RE_JBUF_COUNT(sc); i++) {
3278                 jbuf = &ldata->re_jbuf[i];
3279
3280                 jbuf->re_sc = sc;
3281                 jbuf->re_inuse = 0;
3282                 jbuf->re_slot = i;
3283                 jbuf->re_buf = buf;
3284                 jbuf->re_paddr = paddr;
3285
3286                 SLIST_INSERT_HEAD(&ldata->re_jbuf_free, jbuf, re_link);
3287
3288                 buf += RE_JBUF_SIZE;
3289                 paddr += RE_JBUF_SIZE;
3290         }
3291         return 0;
3292 }
3293
3294 static void
3295 re_jpool_free(struct re_softc *sc)
3296 {
3297         struct re_list_data *ldata = &sc->re_ldata;
3298
3299         if (ldata->re_jpool_tag != NULL) {
3300                 bus_dmamap_unload(ldata->re_jpool_tag, ldata->re_jpool_map);
3301                 bus_dmamem_free(ldata->re_jpool_tag, ldata->re_jpool,
3302                                 ldata->re_jpool_map);
3303                 bus_dma_tag_destroy(ldata->re_jpool_tag);
3304                 ldata->re_jpool_tag = NULL;
3305         }
3306
3307         if (ldata->re_jbuf != NULL) {
3308                 kfree(ldata->re_jbuf, M_DEVBUF);
3309                 ldata->re_jbuf = NULL;
3310         }
3311 }
3312
3313 static struct re_jbuf *
3314 re_jbuf_alloc(struct re_softc *sc)
3315 {
3316         struct re_list_data *ldata = &sc->re_ldata;
3317         struct re_jbuf *jbuf;
3318
3319         lwkt_serialize_enter(&ldata->re_jbuf_serializer);
3320
3321         jbuf = SLIST_FIRST(&ldata->re_jbuf_free);
3322         if (jbuf != NULL) {
3323                 SLIST_REMOVE_HEAD(&ldata->re_jbuf_free, re_link);
3324                 jbuf->re_inuse = 1;
3325         }
3326
3327         lwkt_serialize_exit(&ldata->re_jbuf_serializer);
3328
3329         return jbuf;
3330 }
3331
3332 static void
3333 re_jbuf_free(void *arg)
3334 {
3335         struct re_jbuf *jbuf = arg;
3336         struct re_softc *sc = jbuf->re_sc;
3337         struct re_list_data *ldata = &sc->re_ldata;
3338
3339         if (&ldata->re_jbuf[jbuf->re_slot] != jbuf) {
3340                 panic("%s: free wrong jumbo buffer\n",
3341                       sc->arpcom.ac_if.if_xname);
3342         } else if (jbuf->re_inuse == 0) {
3343                 panic("%s: jumbo buffer already freed\n",
3344                       sc->arpcom.ac_if.if_xname);
3345         }
3346
3347         lwkt_serialize_enter(&ldata->re_jbuf_serializer);
3348         atomic_subtract_int(&jbuf->re_inuse, 1);
3349         if (jbuf->re_inuse == 0)
3350                 SLIST_INSERT_HEAD(&ldata->re_jbuf_free, jbuf, re_link);
3351         lwkt_serialize_exit(&ldata->re_jbuf_serializer);
3352 }
3353
3354 static void
3355 re_jbuf_ref(void *arg)
3356 {
3357         struct re_jbuf *jbuf = arg;
3358         struct re_softc *sc = jbuf->re_sc;
3359         struct re_list_data *ldata = &sc->re_ldata;
3360
3361         if (&ldata->re_jbuf[jbuf->re_slot] != jbuf) {
3362                 panic("%s: ref wrong jumbo buffer\n",
3363                       sc->arpcom.ac_if.if_xname);
3364         } else if (jbuf->re_inuse == 0) {
3365                 panic("%s: jumbo buffer already freed\n",
3366                       sc->arpcom.ac_if.if_xname);
3367         }
3368         atomic_add_int(&jbuf->re_inuse, 1);
3369 }