ecc/e5: No need to check rank disabled bits
[dragonfly.git] / sys / dev / misc / ecc / ecc_e5.c
1 /*
2  * Copyright (c) 2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/bitops.h>
41
42 #include <bus/pci/pcivar.h>
43 #include <bus/pci/pcireg.h>
44 #include <bus/pci/pcibus.h>
45 #include <bus/pci/pci_cfgreg.h>
46 #include <bus/pci/pcib_private.h>
47
48 #include "pcib_if.h"
49
50 #include <dev/misc/ecc/ecc_e5_reg.h>
51
52 #define UBOX_READ(dev, ofs, w)                          \
53         pcib_read_config((dev), pci_get_bus((dev)),     \
54             PCISLOT_E5_UBOX0, PCIFUNC_E5_UBOX0, (ofs), w)
55 #define UBOX_READ_2(dev, ofs)           UBOX_READ((dev), (ofs), 2)
56 #define UBOX_READ_4(dev, ofs)           UBOX_READ((dev), (ofs), 4)
57
58 #define IMC_CPGC_READ(dev, ofs, w)                      \
59         pcib_read_config((dev), pci_get_bus((dev)),     \
60             PCISLOT_E5_IMC_CPGC, PCIFUNC_E5_IMC_CPGC, (ofs), w)
61 #define IMC_CPGC_READ_2(dev, ofs)       IMC_CPGC_READ((dev), (ofs), 2)
62 #define IMC_CPGC_READ_4(dev, ofs)       IMC_CPGC_READ((dev), (ofs), 4)
63
64 #define IMC_CTAD_READ(dev, c, ofs, w)                   \
65         pcib_read_config((dev), pci_get_bus((dev)),     \
66             PCISLOT_E5_IMC_CTAD, PCIFUNC_E5_IMC_CTAD((c)), (ofs), w)
67 #define IMC_CTAD_READ_2(dev, c, ofs)    IMC_CTAD_READ((dev), (c), (ofs), 2)
68 #define IMC_CTAD_READ_4(dev, c, ofs)    IMC_CTAD_READ((dev), (c), (ofs), 4)
69
70 struct ecc_e5_type {
71         uint16_t        did;
72         int             slot;
73         int             func;
74         int             chan;
75         const char      *desc;
76 };
77
78 struct ecc_e5_rank {
79         int             rank_dimm;      /* owner dimm */
80         int             rank_dimm_rank; /* rank within the owner dimm */
81 };
82
83 struct ecc_e5_softc {
84         device_t                ecc_dev;
85         int                     ecc_chan;
86         int                     ecc_node;
87         int                     ecc_rank_cnt;
88         struct ecc_e5_rank      ecc_rank[PCI_E5_IMC_ERROR_RANK_MAX];
89         struct callout          ecc_callout;
90 };
91
92 #define ecc_printf(sc, fmt, arg...) \
93         device_printf((sc)->ecc_dev, fmt , ##arg)
94
95 static int      ecc_e5_probe(device_t);
96 static int      ecc_e5_attach(device_t);
97 static int      ecc_e5_detach(device_t);
98 static void     ecc_e5_shutdown(device_t);
99
100 static void     ecc_e5_callout(void *);
101
102 #define ECC_E5_TYPE_V2(c) \
103 { \
104         .did    = PCI_E5_IMC_ERROR_CHN##c##_DID_ID, \
105         .slot   = PCISLOT_E5_IMC_ERROR, \
106         .func   = PCIFUNC_E5_IMC_ERROR_CHN##c, \
107         .chan   = c, \
108         .desc   = "Intel E5 v2 ECC" \
109 }
110
111 #define ECC_E5_TYPE_END         { 0, 0, 0, 0, NULL }
112
113 static const struct ecc_e5_type ecc_types[] = {
114         ECC_E5_TYPE_V2(0),
115         ECC_E5_TYPE_V2(1),
116         ECC_E5_TYPE_V2(2),
117         ECC_E5_TYPE_V2(3),
118
119         ECC_E5_TYPE_END
120 };
121
122 #undef ECC_E5_TYPE_V2
123 #undef ECC_E5_TYPE_END
124
125 static device_method_t ecc_e5_methods[] = {
126         /* Device interface */
127         DEVMETHOD(device_probe,         ecc_e5_probe),
128         DEVMETHOD(device_attach,        ecc_e5_attach),
129         DEVMETHOD(device_detach,        ecc_e5_detach),
130         DEVMETHOD(device_shutdown,      ecc_e5_shutdown),
131         DEVMETHOD(device_suspend,       bus_generic_suspend),
132         DEVMETHOD(device_resume,        bus_generic_resume),
133         DEVMETHOD_END
134 };
135
136 static driver_t ecc_e5_driver = {
137         "ecc",
138         ecc_e5_methods,
139         sizeof(struct ecc_e5_softc)
140 };
141 static devclass_t ecc_devclass;
142 DRIVER_MODULE(ecc_e5, pci, ecc_e5_driver, ecc_devclass, NULL, NULL);
143 MODULE_DEPEND(ecc_e5, pci, 1, 1, 1);
144
145 static int
146 ecc_e5_probe(device_t dev)
147 {
148         const struct ecc_e5_type *t;
149         uint16_t vid, did;
150         int slot, func;
151
152         vid = pci_get_vendor(dev);
153         if (vid != PCI_E5_VID_ID)
154                 return ENXIO;
155
156         did = pci_get_device(dev);
157         slot = pci_get_slot(dev);
158         func = pci_get_function(dev);
159
160         for (t = ecc_types; t->desc != NULL; ++t) {
161                 if (t->did == did && t->slot == slot && t->func == func) {
162                         struct ecc_e5_softc *sc = device_get_softc(dev);
163                         char desc[32];
164                         uint32_t val;
165                         int node, dimm;
166
167                         /* Check CPGC vid/did */
168                         if (IMC_CPGC_READ_2(dev, PCIR_VENDOR) !=
169                             PCI_E5_VID_ID ||
170                             IMC_CPGC_READ_2(dev, PCIR_DEVICE) !=
171                             PCI_E5_IMC_CPGC_DID_ID)
172                                 break;
173
174                         /* Is this channel disabled */
175                         val = IMC_CPGC_READ_4(dev, PCI_E5_IMC_CPGC_MCMTR);
176                         if (val & PCI_E5_IMC_CPGC_MCMTR_CHN_DISABLE(t->chan))
177                                 break;
178
179                         /* Check CTAD vid/did */
180                         if (IMC_CTAD_READ_2(dev, t->chan, PCIR_VENDOR) !=
181                             PCI_E5_VID_ID ||
182                             IMC_CTAD_READ_2(dev, t->chan, PCIR_DEVICE) !=
183                             PCI_E5_IMC_CTAD_DID_ID(t->chan))
184                                 break;
185
186                         /* Are there any DIMMs populated? */
187                         for (dimm = 0; dimm < PCI_E5_IMC_DIMM_MAX; ++dimm) {
188                                 val = IMC_CTAD_READ_4(dev, t->chan,
189                                     PCI_E5_IMC_CTAD_DIMMMTR(dimm));
190                                 if (val & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP)
191                                         break;
192                         }
193                         if (dimm == PCI_E5_IMC_DIMM_MAX)
194                                 break;
195
196                         /* Check UBOX vid/did */
197                         if (UBOX_READ_2(dev, PCIR_VENDOR) != PCI_E5_VID_ID ||
198                             UBOX_READ_2(dev, PCIR_DEVICE) !=
199                             PCI_E5_UBOX0_DID_ID)
200                                 break;
201
202                         val = UBOX_READ_4(dev, PCI_E5_UBOX0_CPUNODEID);
203                         node = __SHIFTOUT(val,
204                             PCI_E5_UBOX0_CPUNODEID_LCLNODEID);
205
206                         ksnprintf(desc, sizeof(desc), "%s node%d, channel%d",
207                             t->desc, node, t->chan);
208                         device_set_desc_copy(dev, desc);
209
210                         sc->ecc_chan = t->chan;
211                         sc->ecc_node = node;
212                         return 0;
213                 }
214         }
215         return ENXIO;
216 }
217
218 static int
219 ecc_e5_attach(device_t dev)
220 {
221         struct ecc_e5_softc *sc = device_get_softc(dev);
222         uint32_t mcmtr;
223         uint32_t dimmmtr[PCI_E5_IMC_DIMM_MAX];
224         int dimm, rank;
225
226         callout_init_mp(&sc->ecc_callout);
227         sc->ecc_dev = dev;
228
229         mcmtr = IMC_CPGC_READ_4(sc->ecc_dev, PCI_E5_IMC_CPGC_MCMTR);
230         if (bootverbose) {
231                 if (__SHIFTOUT(mcmtr, PCI_E5_IMC_CPGC_MCMTR_IMC_MODE) ==
232                     PCI_E5_IMC_CPGC_MCMTR_IMC_MODE_DDR3)
233                         ecc_printf(sc, "native DDR3\n");
234         }
235
236         rank = 0;
237         for (dimm = 0; dimm < PCI_E5_IMC_DIMM_MAX; ++dimm) {
238                 const char *width;
239                 int rank_cnt, r;
240                 int density;
241                 int val;
242
243                 dimmmtr[dimm] = IMC_CTAD_READ_4(sc->ecc_dev, sc->ecc_chan,
244                     PCI_E5_IMC_CTAD_DIMMMTR(dimm));
245
246                 if ((dimmmtr[dimm] & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) == 0)
247                         continue;
248
249                 val = __SHIFTOUT(dimmmtr[dimm],
250                     PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT);
251                 switch (val) {
252                 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_SR:
253                         rank_cnt = 1;
254                         break;
255                 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_DR:
256                         rank_cnt = 2;
257                         break;
258                 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_QR:
259                         rank_cnt = 4;
260                         break;
261                 default:
262                         ecc_printf(sc, "unknown rank count 0x%x\n", val);
263                         return ENXIO;
264                 }
265
266                 val = __SHIFTOUT(dimmmtr[dimm],
267                     PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH);
268                 switch (val) {
269                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_4:
270                         width = "x4";
271                         break;
272                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_8:
273                         width = "x8";
274                         break;
275                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_16:
276                         width = "x16";
277                         break;
278                 default:
279                         ecc_printf(sc, "unknown ddr3 width 0x%x\n", val);
280                         return ENXIO;
281                 }
282
283                 val = __SHIFTOUT(dimmmtr[dimm],
284                     PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY);
285                 switch (val) {
286                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_1G:
287                         density = 1;
288                         break;
289                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_2G:
290                         density = 2;
291                         break;
292                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_4G:
293                         density = 4;
294                         break;
295                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_8G:
296                         density = 8;
297                         break;
298                 default:
299                         ecc_printf(sc, "unknown ddr3 density 0x%x\n", val);
300                         return ENXIO;
301                 }
302
303                 if (bootverbose) {
304                         ecc_printf(sc, "DIMM%d %dGB, %d%s, density %dGB\n",
305                             dimm, density * rank_cnt * 2,
306                             rank_cnt, width, density);
307                 }
308
309                 for (r = 0; r < rank_cnt; ++r) {
310                         struct ecc_e5_rank *rk;
311
312                         if (rank >= PCI_E5_IMC_ERROR_RANK_MAX) {
313                                 ecc_printf(sc, "too many ranks\n");
314                                 return ENXIO;
315                         }
316                         rk = &sc->ecc_rank[rank];
317
318                         rk->rank_dimm = dimm;
319                         rk->rank_dimm_rank = r;
320
321                         ++rank;
322                 }
323         }
324         sc->ecc_rank_cnt = rank;
325
326         if ((mcmtr & PCI_E5_IMC_CPGC_MCMTR_ECC_EN) == 0) {
327                 ecc_printf(sc, "ECC is not enabled\n");
328                 return 0;
329         }
330
331         if (bootverbose) {
332                 for (rank = 0; rank < sc->ecc_rank_cnt; ++rank) {
333                         const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
334                         uint32_t thr, mask;
335                         int ofs;
336
337                         ofs = PCI_E5_IMC_ERROR_COR_ERR_TH(rank / 2);
338                         if (rank & 1)
339                                 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI;
340                         else
341                                 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI;
342
343                         thr = pci_read_config(sc->ecc_dev, ofs, 4);
344                         ecc_printf(sc, "DIMM%d rank%d, "
345                             "corrected error threshold %d\n",
346                             rk->rank_dimm, rk->rank_dimm_rank,
347                             __SHIFTOUT(thr, mask));
348                 }
349         }
350
351         callout_reset(&sc->ecc_callout, hz, ecc_e5_callout, sc);
352         return 0;
353 }
354
355 static void
356 ecc_e5_callout(void *xsc)
357 {
358         struct ecc_e5_softc *sc = xsc;
359         uint32_t err_ranks, val;
360
361         val = pci_read_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 4);
362
363         err_ranks = (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS);
364         while (err_ranks != 0) {
365                 int rank;
366
367                 rank = ffs(err_ranks) - 1;
368                 err_ranks &= ~(1 << rank);
369
370                 if (rank < sc->ecc_rank_cnt) {
371                         const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
372                         uint32_t err, mask;
373                         int ofs;
374
375                         ofs = PCI_E5_IMC_ERROR_COR_ERR_CNT(rank / 2);
376                         if (rank & 1)
377                                 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_HI;
378                         else
379                                 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_LO;
380
381                         err = pci_read_config(sc->ecc_dev, ofs, 4);
382                         ecc_printf(sc, "node%d channel%d DIMM%d rank%d, "
383                             "too many errors %d",
384                             sc->ecc_node, sc->ecc_chan,
385                             rk->rank_dimm, rk->rank_dimm_rank,
386                             __SHIFTOUT(err, mask));
387                 }
388         }
389
390         if (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS) {
391                 pci_write_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT,
392                     val, 4);
393         }
394         callout_reset(&sc->ecc_callout, hz, ecc_e5_callout, sc);
395 }
396
397 static void
398 ecc_e5_stop(device_t dev)
399 {
400         struct ecc_e5_softc *sc = device_get_softc(dev);
401
402         callout_stop_sync(&sc->ecc_callout);
403 }
404
405 static int
406 ecc_e5_detach(device_t dev)
407 {
408         ecc_e5_stop(dev);
409         return 0;
410 }
411
412 static void
413 ecc_e5_shutdown(device_t dev)
414 {
415         ecc_e5_stop(dev);
416 }