ee4c44d582e23fdd7e64044c9f1fb75009ba1191
[dragonfly.git] / sys / dev / misc / ecc / ecc_e5.c
1 /*
2  * Copyright (c) 2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/bitops.h>
41
42 #include <bus/pci/pcivar.h>
43 #include <bus/pci/pcireg.h>
44 #include <bus/pci/pcibus.h>
45 #include <bus/pci/pci_cfgreg.h>
46 #include <bus/pci/pcib_private.h>
47
48 #include "pcib_if.h"
49
50 #include <dev/misc/ecc/ecc_e5_reg.h>
51
52 #define UBOX_READ(dev, ofs, w)                          \
53         pcib_read_config((dev), pci_get_bus((dev)),     \
54             PCISLOT_E5_UBOX0, PCIFUNC_E5_UBOX0, (ofs), w)
55 #define UBOX_READ_2(dev, ofs)           UBOX_READ((dev), (ofs), 2)
56 #define UBOX_READ_4(dev, ofs)           UBOX_READ((dev), (ofs), 4)
57
58 #define IMC_CPGC_READ(dev, ofs, w)                      \
59         pcib_read_config((dev), pci_get_bus((dev)),     \
60             PCISLOT_E5_IMC_CPGC, PCIFUNC_E5_IMC_CPGC, (ofs), w)
61 #define IMC_CPGC_READ_2(dev, ofs)       IMC_CPGC_READ((dev), (ofs), 2)
62 #define IMC_CPGC_READ_4(dev, ofs)       IMC_CPGC_READ((dev), (ofs), 4)
63
64 #define IMC_CTAD_READ(dev, c, ofs, w)                   \
65         pcib_read_config((dev), pci_get_bus((dev)),     \
66             PCISLOT_E5_IMC_CTAD, PCIFUNC_E5_IMC_CTAD((c)), (ofs), w)
67 #define IMC_CTAD_READ_2(dev, c, ofs)    IMC_CTAD_READ((dev), (c), (ofs), 2)
68 #define IMC_CTAD_READ_4(dev, c, ofs)    IMC_CTAD_READ((dev), (c), (ofs), 4)
69
70 struct ecc_e5_type {
71         uint16_t        did;
72         int             slot;
73         int             func;
74         int             chan;
75         const char      *desc;
76 };
77
78 struct ecc_e5_rank {
79         int             rank_dimm;      /* owner dimm */
80         int             rank_dimm_rank; /* rank within the owner dimm */
81 };
82
83 struct ecc_e5_softc {
84         device_t                ecc_dev;
85         int                     ecc_chan;
86         int                     ecc_node;
87         int                     ecc_rank_cnt;
88         struct ecc_e5_rank      ecc_rank[PCI_E5_IMC_ERROR_RANK_MAX];
89         struct callout          ecc_callout;
90 };
91
92 #define ecc_printf(sc, fmt, arg...) \
93         device_printf((sc)->ecc_dev, fmt , ##arg)
94
95 static int      ecc_e5_probe(device_t);
96 static int      ecc_e5_attach(device_t);
97 static int      ecc_e5_detach(device_t);
98 static void     ecc_e5_shutdown(device_t);
99
100 static void     ecc_e5_callout(void *);
101
102 #define ECC_E5_TYPE_V2(c) \
103 { \
104         .did    = PCI_E5_IMC_ERROR_CHN##c##_DID_ID, \
105         .slot   = PCISLOT_E5_IMC_ERROR, \
106         .func   = PCIFUNC_E5_IMC_ERROR_CHN##c, \
107         .chan   = c, \
108         .desc   = "Intel E5 v2 ECC" \
109 }
110
111 #define ECC_E5_TYPE_END         { 0, 0, 0, 0, NULL }
112
113 static const struct ecc_e5_type ecc_types[] = {
114         ECC_E5_TYPE_V2(0),
115         ECC_E5_TYPE_V2(1),
116         ECC_E5_TYPE_V2(2),
117         ECC_E5_TYPE_V2(3),
118
119         ECC_E5_TYPE_END
120 };
121
122 #undef ECC_E5_TYPE_V2
123 #undef ECC_E5_TYPE_END
124
125 static device_method_t ecc_e5_methods[] = {
126         /* Device interface */
127         DEVMETHOD(device_probe,         ecc_e5_probe),
128         DEVMETHOD(device_attach,        ecc_e5_attach),
129         DEVMETHOD(device_detach,        ecc_e5_detach),
130         DEVMETHOD(device_shutdown,      ecc_e5_shutdown),
131         DEVMETHOD(device_suspend,       bus_generic_suspend),
132         DEVMETHOD(device_resume,        bus_generic_resume),
133         DEVMETHOD_END
134 };
135
136 static driver_t ecc_e5_driver = {
137         "ecc",
138         ecc_e5_methods,
139         sizeof(struct ecc_e5_softc)
140 };
141 static devclass_t ecc_devclass;
142 DRIVER_MODULE(ecc_e5, pci, ecc_e5_driver, ecc_devclass, NULL, NULL);
143 MODULE_DEPEND(ecc_e5, pci, 1, 1, 1);
144
145 static int
146 ecc_e5_probe(device_t dev)
147 {
148         const struct ecc_e5_type *t;
149         uint16_t vid, did;
150         int slot, func;
151
152         vid = pci_get_vendor(dev);
153         if (vid != PCI_E5_VID_ID)
154                 return ENXIO;
155
156         did = pci_get_device(dev);
157         slot = pci_get_slot(dev);
158         func = pci_get_function(dev);
159
160         for (t = ecc_types; t->desc != NULL; ++t) {
161                 if (t->did == did && t->slot == slot && t->func == func) {
162                         struct ecc_e5_softc *sc = device_get_softc(dev);
163                         char desc[32];
164                         uint32_t val;
165                         int node, dimm;
166
167                         /* Check CPGC vid/did */
168                         if (IMC_CPGC_READ_2(dev, PCIR_VENDOR) !=
169                             PCI_E5_VID_ID ||
170                             IMC_CPGC_READ_2(dev, PCIR_DEVICE) !=
171                             PCI_E5_IMC_CPGC_DID_ID)
172                                 break;
173
174                         /* Is this channel disabled */
175                         val = IMC_CPGC_READ_4(dev, PCI_E5_IMC_CPGC_MCMTR);
176                         if (val & PCI_E5_IMC_CPGC_MCMTR_CHN_DISABLE(t->chan))
177                                 break;
178
179                         /* Check CTAD vid/did */
180                         if (IMC_CTAD_READ_2(dev, t->chan, PCIR_VENDOR) !=
181                             PCI_E5_VID_ID ||
182                             IMC_CTAD_READ_2(dev, t->chan, PCIR_DEVICE) !=
183                             PCI_E5_IMC_CTAD_DID_ID(t->chan))
184                                 break;
185
186                         /* Are there any DIMMs populated? */
187                         for (dimm = 0; dimm < PCI_E5_IMC_DIMM_MAX; ++dimm) {
188                                 val = IMC_CTAD_READ_4(dev, t->chan,
189                                     PCI_E5_IMC_CTAD_DIMMMTR(dimm));
190                                 if (val & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP)
191                                         break;
192                         }
193                         if (dimm == PCI_E5_IMC_DIMM_MAX)
194                                 break;
195
196                         /* Check UBOX vid/did */
197                         if (UBOX_READ_2(dev, PCIR_VENDOR) != PCI_E5_VID_ID ||
198                             UBOX_READ_2(dev, PCIR_DEVICE) !=
199                             PCI_E5_UBOX0_DID_ID)
200                                 break;
201
202                         val = UBOX_READ_4(dev, PCI_E5_UBOX0_CPUNODEID);
203                         node = __SHIFTOUT(val,
204                             PCI_E5_UBOX0_CPUNODEID_LCLNODEID);
205
206                         ksnprintf(desc, sizeof(desc), "%s node%d channel%d",
207                             t->desc, node, t->chan);
208                         device_set_desc_copy(dev, desc);
209
210                         sc->ecc_chan = t->chan;
211                         sc->ecc_node = node;
212                         return 0;
213                 }
214         }
215         return ENXIO;
216 }
217
218 static int
219 ecc_e5_attach(device_t dev)
220 {
221         struct ecc_e5_softc *sc = device_get_softc(dev);
222         uint32_t mcmtr;
223         int dimm, rank;
224
225         callout_init_mp(&sc->ecc_callout);
226         sc->ecc_dev = dev;
227
228         mcmtr = IMC_CPGC_READ_4(sc->ecc_dev, PCI_E5_IMC_CPGC_MCMTR);
229         if (bootverbose) {
230                 if (__SHIFTOUT(mcmtr, PCI_E5_IMC_CPGC_MCMTR_IMC_MODE) ==
231                     PCI_E5_IMC_CPGC_MCMTR_IMC_MODE_DDR3)
232                         ecc_printf(sc, "native DDR3\n");
233         }
234
235         rank = 0;
236         for (dimm = 0; dimm < PCI_E5_IMC_DIMM_MAX; ++dimm) {
237                 const char *width;
238                 uint32_t dimmmtr;
239                 int rank_cnt, r;
240                 int density;
241                 int val;
242
243                 dimmmtr = IMC_CTAD_READ_4(sc->ecc_dev, sc->ecc_chan,
244                     PCI_E5_IMC_CTAD_DIMMMTR(dimm));
245
246                 if ((dimmmtr & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) == 0)
247                         continue;
248
249                 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT);
250                 switch (val) {
251                 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_SR:
252                         rank_cnt = 1;
253                         break;
254                 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_DR:
255                         rank_cnt = 2;
256                         break;
257                 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_QR:
258                         rank_cnt = 4;
259                         break;
260                 default:
261                         ecc_printf(sc, "unknown rank count 0x%x\n", val);
262                         return ENXIO;
263                 }
264
265                 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH);
266                 switch (val) {
267                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_4:
268                         width = "x4";
269                         break;
270                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_8:
271                         width = "x8";
272                         break;
273                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_16:
274                         width = "x16";
275                         break;
276                 default:
277                         ecc_printf(sc, "unknown ddr3 width 0x%x\n", val);
278                         return ENXIO;
279                 }
280
281                 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY);
282                 switch (val) {
283                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_1G:
284                         density = 1;
285                         break;
286                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_2G:
287                         density = 2;
288                         break;
289                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_4G:
290                         density = 4;
291                         break;
292                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_8G:
293                         density = 8;
294                         break;
295                 default:
296                         ecc_printf(sc, "unknown ddr3 density 0x%x\n", val);
297                         return ENXIO;
298                 }
299
300                 if (bootverbose) {
301                         ecc_printf(sc, "DIMM%d %dGB, %d%s, density %dGB\n",
302                             dimm, density * rank_cnt * 2,
303                             rank_cnt, width, density);
304                 }
305
306                 for (r = 0; r < rank_cnt; ++r) {
307                         struct ecc_e5_rank *rk;
308
309                         if (rank >= PCI_E5_IMC_ERROR_RANK_MAX) {
310                                 ecc_printf(sc, "too many ranks\n");
311                                 return ENXIO;
312                         }
313                         rk = &sc->ecc_rank[rank];
314
315                         rk->rank_dimm = dimm;
316                         rk->rank_dimm_rank = r;
317
318                         ++rank;
319                 }
320         }
321         sc->ecc_rank_cnt = rank;
322
323         if ((mcmtr & PCI_E5_IMC_CPGC_MCMTR_ECC_EN) == 0) {
324                 ecc_printf(sc, "ECC is not enabled\n");
325                 return 0;
326         }
327
328         if (bootverbose) {
329                 for (rank = 0; rank < sc->ecc_rank_cnt; ++rank) {
330                         const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
331                         uint32_t thr, mask;
332                         int ofs;
333
334                         ofs = PCI_E5_IMC_ERROR_COR_ERR_TH(rank / 2);
335                         if (rank & 1)
336                                 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI;
337                         else
338                                 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI;
339
340                         thr = pci_read_config(sc->ecc_dev, ofs, 4);
341                         ecc_printf(sc, "DIMM%d rank%d, "
342                             "corrected error threshold %d\n",
343                             rk->rank_dimm, rk->rank_dimm_rank,
344                             __SHIFTOUT(thr, mask));
345                 }
346         }
347
348         callout_reset(&sc->ecc_callout, hz, ecc_e5_callout, sc);
349         return 0;
350 }
351
352 static void
353 ecc_e5_callout(void *xsc)
354 {
355         struct ecc_e5_softc *sc = xsc;
356         uint32_t err_ranks, val;
357
358         val = pci_read_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 4);
359
360         err_ranks = (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS);
361         while (err_ranks != 0) {
362                 int rank;
363
364                 rank = ffs(err_ranks) - 1;
365                 err_ranks &= ~(1 << rank);
366
367                 if (rank < sc->ecc_rank_cnt) {
368                         const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
369                         uint32_t err, mask;
370                         int ofs;
371
372                         ofs = PCI_E5_IMC_ERROR_COR_ERR_CNT(rank / 2);
373                         if (rank & 1)
374                                 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_HI;
375                         else
376                                 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_LO;
377
378                         err = pci_read_config(sc->ecc_dev, ofs, 4);
379                         ecc_printf(sc, "node%d channel%d DIMM%d rank%d, "
380                             "too many errors %d",
381                             sc->ecc_node, sc->ecc_chan,
382                             rk->rank_dimm, rk->rank_dimm_rank,
383                             __SHIFTOUT(err, mask));
384                 }
385         }
386
387         if (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS) {
388                 pci_write_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT,
389                     val, 4);
390         }
391         callout_reset(&sc->ecc_callout, hz, ecc_e5_callout, sc);
392 }
393
394 static void
395 ecc_e5_stop(device_t dev)
396 {
397         struct ecc_e5_softc *sc = device_get_softc(dev);
398
399         callout_stop_sync(&sc->ecc_callout);
400 }
401
402 static int
403 ecc_e5_detach(device_t dev)
404 {
405         ecc_e5_stop(dev);
406         return 0;
407 }
408
409 static void
410 ecc_e5_shutdown(device_t dev)
411 {
412         ecc_e5_stop(dev);
413 }