2 * Copyright (c) 2015 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Sepherosa Ziehau <sepherosa@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bitops.h>
39 #include <sys/cpu_topology.h>
40 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/queue.h>
43 #include <sys/sensors.h>
45 #include <bus/pci/pcivar.h>
46 #include <bus/pci/pcireg.h>
47 #include <bus/pci/pci_cfgreg.h>
48 #include <bus/pci/pcib_private.h>
52 #include <dev/misc/dimm/dimm.h>
53 #include <dev/misc/ecc/e5_imc_reg.h>
54 #include <dev/misc/ecc/e5_imc_var.h>
57 TAILQ_ENTRY(ecc_e5_dimm) dimm_link;
58 struct dimm_softc *dimm_softc;
59 struct ksensor dimm_sensor;
63 struct ecc_e5_dimm *rank_dimm_sc;
68 const struct e5_imc_chan *ecc_chan;
71 struct ecc_e5_rank ecc_rank[PCI_E5_IMC_ERROR_RANK_MAX];
72 struct sensor_task *ecc_senstask;
73 TAILQ_HEAD(, ecc_e5_dimm) ecc_dimm;
76 #define ecc_printf(sc, fmt, arg...) \
77 device_printf((sc)->ecc_dev, fmt , ##arg)
79 static int ecc_e5_probe(device_t);
80 static int ecc_e5_attach(device_t);
81 static int ecc_e5_detach(device_t);
82 static void ecc_e5_shutdown(device_t);
84 static void ecc_e5_sensor_task(void *);
86 #define ECC_E5_CHAN(v, imc, c, c_ext) \
88 .did = PCI_E5V##v##_IMC##imc##_ERROR_CHN##c##_DID_ID, \
89 .slot = PCISLOT_E5V##v##_IMC##imc##_ERROR_CHN##c, \
90 .func = PCIFUNC_E5V##v##_IMC##imc##_ERROR_CHN##c, \
91 .desc = "Intel E5 v" #v " ECC", \
93 E5_IMC_CHAN_FIELDS(v, imc, c, c_ext) \
96 #define ECC_E5_CHAN_V2(c) ECC_E5_CHAN(2, 0, c, c)
97 #define ECC_E5_CHAN_IMC0_V3(c) ECC_E5_CHAN(3, 0, c, c)
98 #define ECC_E5_CHAN_IMC1_V3(c, c_ext) ECC_E5_CHAN(3, 1, c, c_ext)
99 #define ECC_E5_CHAN_END E5_IMC_CHAN_END
101 static const struct e5_imc_chan ecc_e5_chans[] = {
107 ECC_E5_CHAN_IMC0_V3(0),
108 ECC_E5_CHAN_IMC0_V3(1),
109 ECC_E5_CHAN_IMC0_V3(2),
110 ECC_E5_CHAN_IMC0_V3(3),
111 ECC_E5_CHAN_IMC1_V3(0, 2), /* IMC1 chan0 -> channel2 */
112 ECC_E5_CHAN_IMC1_V3(1, 3), /* IMC1 chan1 -> channel3 */
117 #undef ECC_E5_CHAN_END
118 #undef ECC_E5_CHAN_V2
121 static device_method_t ecc_e5_methods[] = {
122 /* Device interface */
123 DEVMETHOD(device_probe, ecc_e5_probe),
124 DEVMETHOD(device_attach, ecc_e5_attach),
125 DEVMETHOD(device_detach, ecc_e5_detach),
126 DEVMETHOD(device_shutdown, ecc_e5_shutdown),
127 DEVMETHOD(device_suspend, bus_generic_suspend),
128 DEVMETHOD(device_resume, bus_generic_resume),
132 static driver_t ecc_e5_driver = {
135 sizeof(struct ecc_e5_softc)
137 static devclass_t ecc_devclass;
138 DRIVER_MODULE(ecc_e5, pci, ecc_e5_driver, ecc_devclass, NULL, NULL);
139 MODULE_DEPEND(ecc_e5, pci, 1, 1, 1);
140 MODULE_DEPEND(ecc_e5, dimm, 1, 1, 1);
141 MODULE_VERSION(ecc_e5, 1);
144 ecc_e5_probe(device_t dev)
146 const struct e5_imc_chan *c;
150 vid = pci_get_vendor(dev);
151 if (vid != PCI_E5_IMC_VID_ID)
154 did = pci_get_device(dev);
155 slot = pci_get_slot(dev);
156 func = pci_get_function(dev);
158 for (c = ecc_e5_chans; c->desc != NULL; ++c) {
159 if (c->did == did && c->slot == slot && c->func == func) {
160 struct ecc_e5_softc *sc = device_get_softc(dev);
163 node = e5_imc_node_probe(dev, c);
167 device_set_desc(dev, c->desc);
178 ecc_e5_attach(device_t dev)
180 struct ecc_e5_softc *sc = device_get_softc(dev);
181 int dimm, rank, error, cpuid;
182 const cpu_node_t *node;
185 TAILQ_INIT(&sc->ecc_dimm);
188 mcmtr = IMC_CPGC_READ_4(sc->ecc_dev, sc->ecc_chan,
189 PCI_E5_IMC_CPGC_MCMTR);
191 if (sc->ecc_chan->ver == E5_IMC_CHAN_VER3 &&
192 (mcmtr & PCI_E5V3_IMC_CPGC_MCMTR_DDR4))
193 ecc_printf(sc, "DDR4\n");
194 if (__SHIFTOUT(mcmtr, PCI_E5_IMC_CPGC_MCMTR_IMC_MODE) ==
195 PCI_E5_IMC_CPGC_MCMTR_IMC_MODE_DDR3) {
196 ecc_printf(sc, "native %s\n",
197 sc->ecc_chan->ver == E5_IMC_CHAN_VER2 ?
203 for (dimm = 0; dimm < PCI_E5_IMC_CHN_DIMM_MAX; ++dimm) {
204 struct ecc_e5_dimm *dimm_sc;
205 struct ksensor *sens;
212 dimmmtr = IMC_CTAD_READ_4(sc->ecc_dev, sc->ecc_chan,
213 PCI_E5_IMC_CTAD_DIMMMTR(dimm));
215 if ((dimmmtr & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) == 0)
218 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT);
220 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_SR:
223 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_DR:
226 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_QR:
229 case PCI_E5V3_IMC_CTAD_DIMMMTR_RANK_CNT_8R:
230 if (sc->ecc_chan->ver >= E5_IMC_CHAN_VER3) {
236 ecc_printf(sc, "unknown rank count 0x%x\n", val);
241 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH);
243 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_4:
246 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_8:
249 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_16:
253 ecc_printf(sc, "unknown ddr3 width 0x%x\n", val);
258 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY);
260 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_2G:
263 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_4G:
266 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_8G:
269 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_1G:
270 if (sc->ecc_chan->ver < E5_IMC_CHAN_VER3) {
276 ecc_printf(sc, "unknown ddr3 density 0x%x\n", val);
282 ecc_printf(sc, "DIMM%d %dGB, %d%s, density %dGB\n",
283 dimm, density * rank_cnt * 2,
284 rank_cnt, width, density);
287 dimm_sc = kmalloc(sizeof(*dimm_sc), M_DEVBUF,
289 dimm_sc->dimm_softc =
290 dimm_create(sc->ecc_node, sc->ecc_chan->chan_ext, dimm);
292 sens = &dimm_sc->dimm_sensor;
293 ksnprintf(sens->desc, sizeof(sens->desc),
294 "node%d chan%d DIMM%d ecc",
295 sc->ecc_node, sc->ecc_chan->chan_ext, dimm);
296 sens->type = SENSOR_ECC;
297 sensor_set(sens, 0, SENSOR_S_OK);
298 dimm_sensor_attach(dimm_sc->dimm_softc, sens);
300 TAILQ_INSERT_TAIL(&sc->ecc_dimm, dimm_sc, dimm_link);
302 for (r = 0; r < rank_cnt; ++r) {
303 struct ecc_e5_rank *rk;
305 if (rank >= PCI_E5_IMC_ERROR_RANK_MAX) {
306 ecc_printf(sc, "too many ranks\n");
311 rk = &sc->ecc_rank[rank];
312 rk->rank_dimm_sc = dimm_sc;
316 sc->ecc_rank_cnt = rank;
318 if ((mcmtr & PCI_E5_IMC_CPGC_MCMTR_ECC_EN) == 0) {
319 ecc_printf(sc, "ECC is not enabled\n");
323 for (rank = 0; rank < sc->ecc_rank_cnt; ++rank) {
324 const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
328 ofs = PCI_E5_IMC_ERROR_COR_ERR_TH(rank / 2);
330 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI;
332 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_LO;
334 thr = pci_read_config(sc->ecc_dev, ofs, 4);
335 dimm_set_ecc_thresh(rk->rank_dimm_sc->dimm_softc,
336 __SHIFTOUT(thr, mask));
340 node = get_cpu_node_by_chipid(sc->ecc_node);
341 if (node != NULL && node->child_no > 0) {
342 cpuid = BSRCPUMASK(node->members);
344 device_printf(dev, "node%d chan%d -> cpu%d\n",
345 sc->ecc_node, sc->ecc_chan->chan_ext, cpuid);
348 sc->ecc_senstask = sensor_task_register2(sc, ecc_e5_sensor_task,
358 ecc_e5_sensor_task(void *xsc)
360 struct ecc_e5_softc *sc = xsc;
361 uint32_t err_ranks, val;
363 val = pci_read_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 4);
365 err_ranks = (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS);
366 while (err_ranks != 0) {
369 rank = ffs(err_ranks) - 1;
370 err_ranks &= ~(1 << rank);
372 if (rank < sc->ecc_rank_cnt) {
373 const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
374 struct ecc_e5_dimm *dimm_sc = rk->rank_dimm_sc;
378 ofs = PCI_E5_IMC_ERROR_COR_ERR_CNT(rank / 2);
380 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_HI;
382 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_LO;
384 err = pci_read_config(sc->ecc_dev, ofs, 4);
385 ecc_cnt = __SHIFTOUT(err, mask);
387 dimm_sensor_ecc_set(dimm_sc->dimm_softc,
388 &dimm_sc->dimm_sensor, ecc_cnt, TRUE);
390 ecc_printf(sc, "channel%d rank%d critical error\n",
391 sc->ecc_chan->chan_ext, rank);
395 if (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS) {
396 pci_write_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT,
402 ecc_e5_stop(device_t dev)
404 struct ecc_e5_softc *sc = device_get_softc(dev);
406 if (sc->ecc_senstask != NULL) {
407 sensor_task_unregister2(sc->ecc_senstask);
408 sc->ecc_senstask = NULL;
413 ecc_e5_detach(device_t dev)
415 struct ecc_e5_softc *sc = device_get_softc(dev);
416 struct ecc_e5_dimm *dimm_sc;
420 while ((dimm_sc = TAILQ_FIRST(&sc->ecc_dimm)) != NULL) {
421 TAILQ_REMOVE(&sc->ecc_dimm, dimm_sc, dimm_link);
422 dimm_sensor_detach(dimm_sc->dimm_softc, &dimm_sc->dimm_sensor);
423 dimm_destroy(dimm_sc->dimm_softc);
425 kfree(dimm_sc, M_DEVBUF);
431 ecc_e5_shutdown(device_t dev)