ecc: Support Intel E5 v2 memory controller
[dragonfly.git] / sys / dev / misc / ecc / ecc_e5.c
1 /*
2  * Copyright (c) 2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/bitops.h>
41
42 #include <bus/pci/pcivar.h>
43 #include <bus/pci/pcireg.h>
44 #include <bus/pci/pcibus.h>
45 #include <bus/pci/pci_cfgreg.h>
46 #include <bus/pci/pcib_private.h>
47
48 #include "pcib_if.h"
49
50 #include <dev/misc/ecc/ecc_e5_reg.h>
51
52 #define UBOX_READ(dev, ofs, w)                          \
53         pcib_read_config((dev), pci_get_bus((dev)),     \
54             PCISLOT_E5_UBOX0, PCIFUNC_E5_UBOX0, (ofs), w)
55 #define UBOX_READ_2(dev, ofs)           UBOX_READ((dev), (ofs), 2)
56 #define UBOX_READ_4(dev, ofs)           UBOX_READ((dev), (ofs), 4)
57
58 #define IMC_CPGC_READ(dev, ofs, w)                      \
59         pcib_read_config((dev), pci_get_bus((dev)),     \
60             PCISLOT_E5_IMC_CPGC, PCIFUNC_E5_IMC_CPGC, (ofs), w)
61 #define IMC_CPGC_READ_2(dev, ofs)       IMC_CPGC_READ((dev), (ofs), 2)
62 #define IMC_CPGC_READ_4(dev, ofs)       IMC_CPGC_READ((dev), (ofs), 4)
63
64 #define IMC_CTAD_READ(dev, c, ofs, w)                   \
65         pcib_read_config((dev), pci_get_bus((dev)),     \
66             PCISLOT_E5_IMC_CTAD, PCIFUNC_E5_IMC_CTAD((c)), (ofs), w)
67 #define IMC_CTAD_READ_2(dev, c, ofs)    IMC_CTAD_READ((dev), (c), (ofs), 2)
68 #define IMC_CTAD_READ_4(dev, c, ofs)    IMC_CTAD_READ((dev), (c), (ofs), 4)
69
70 struct ecc_e5_type {
71         uint16_t        did;
72         int             slot;
73         int             func;
74         int             chan;
75         const char      *desc;
76 };
77
78 struct ecc_e5_rank {
79         int             rank_dimm;      /* owner dimm */
80         int             rank_dimm_rank; /* rank within the owner dimm */
81 };
82
83 struct ecc_e5_softc {
84         device_t                ecc_dev;
85         int                     ecc_chan;
86         int                     ecc_node;
87         int                     ecc_rank_cnt;
88         struct ecc_e5_rank      ecc_rank[PCI_E5_IMC_ERROR_RANK_MAX];
89         struct callout          ecc_callout;
90 };
91
92 #define ecc_printf(sc, fmt, arg...) \
93         device_printf((sc)->ecc_dev, fmt , ##arg)
94
95 static int      ecc_e5_probe(device_t);
96 static int      ecc_e5_attach(device_t);
97 static int      ecc_e5_detach(device_t);
98 static void     ecc_e5_shutdown(device_t);
99
100 static void     ecc_e5_callout(void *);
101
102 #define ECC_E5_TYPE_V2(c) \
103 { \
104         .did    = PCI_E5_IMC_ERROR_CHN##c##_DID_ID, \
105         .slot   = PCISLOT_E5_IMC_ERROR, \
106         .func   = PCIFUNC_E5_IMC_ERROR_CHN##c, \
107         .chan   = c, \
108         .desc   = "Intel E5 v2 ECC" \
109 }
110
111 #define ECC_E5_TYPE_END         { 0, 0, 0, 0, NULL }
112
113 static const struct ecc_e5_type ecc_types[] = {
114         ECC_E5_TYPE_V2(0),
115         ECC_E5_TYPE_V2(1),
116         ECC_E5_TYPE_V2(2),
117         ECC_E5_TYPE_V2(3),
118
119         ECC_E5_TYPE_END
120 };
121
122 #undef ECC_E5_TYPE_V2
123 #undef ECC_E5_TYPE_END
124
125 static device_method_t ecc_e5_methods[] = {
126         /* Device interface */
127         DEVMETHOD(device_probe,         ecc_e5_probe),
128         DEVMETHOD(device_attach,        ecc_e5_attach),
129         DEVMETHOD(device_detach,        ecc_e5_detach),
130         DEVMETHOD(device_shutdown,      ecc_e5_shutdown),
131         DEVMETHOD(device_suspend,       bus_generic_suspend),
132         DEVMETHOD(device_resume,        bus_generic_resume),
133         DEVMETHOD_END
134 };
135
136 static driver_t ecc_e5_driver = {
137         "ecc",
138         ecc_e5_methods,
139         sizeof(struct ecc_e5_softc)
140 };
141 static devclass_t ecc_devclass;
142 DRIVER_MODULE(ecc_e5, pci, ecc_e5_driver, ecc_devclass, NULL, NULL);
143 MODULE_DEPEND(ecc_e5, pci, 1, 1, 1);
144
145 static int
146 ecc_e5_probe(device_t dev)
147 {
148         const struct ecc_e5_type *t;
149         uint16_t vid, did;
150         int slot, func;
151
152         vid = pci_get_vendor(dev);
153         if (vid != PCI_E5_VID_ID)
154                 return ENXIO;
155
156         did = pci_get_device(dev);
157         slot = pci_get_slot(dev);
158         func = pci_get_function(dev);
159
160         for (t = ecc_types; t->desc != NULL; ++t) {
161                 if (t->did == did && t->slot == slot && t->func == func) {
162                         struct ecc_e5_softc *sc = device_get_softc(dev);
163                         char desc[32];
164                         uint32_t val;
165                         int node, dimm;
166
167                         /* Check CPGC vid/did */
168                         if (IMC_CPGC_READ_2(dev, PCIR_VENDOR) !=
169                             PCI_E5_VID_ID ||
170                             IMC_CPGC_READ_2(dev, PCIR_DEVICE) !=
171                             PCI_E5_IMC_CPGC_DID_ID)
172                                 break;
173
174                         /* Is this channel disabled */
175                         val = IMC_CPGC_READ_4(dev, PCI_E5_IMC_CPGC_MCMTR);
176                         if (val & PCI_E5_IMC_CPGC_MCMTR_CHN_DISABLE(t->chan))
177                                 break;
178
179                         /* Check CTAD vid/did */
180                         if (IMC_CTAD_READ_2(dev, t->chan, PCIR_VENDOR) !=
181                             PCI_E5_VID_ID ||
182                             IMC_CTAD_READ_2(dev, t->chan, PCIR_DEVICE) !=
183                             PCI_E5_IMC_CTAD_DID_ID(t->chan))
184                                 break;
185
186                         /* Are there any DIMMs populated? */
187                         for (dimm = 0; dimm < PCI_E5_IMC_DIMM_MAX; ++dimm) {
188                                 val = IMC_CTAD_READ_4(dev, t->chan,
189                                     PCI_E5_IMC_CTAD_DIMMMTR(dimm));
190                                 if ((val & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) &&
191                                 (val & PCI_E5_IMC_CTAD_DIMMMTR_RANK_DISABLE_ALL)
192                                  != PCI_E5_IMC_CTAD_DIMMMTR_RANK_DISABLE_ALL)
193                                         break;
194                         }
195                         if (dimm == PCI_E5_IMC_DIMM_MAX)
196                                 break;
197
198                         /* Check UBOX vid/did */
199                         if (UBOX_READ_2(dev, PCIR_VENDOR) != PCI_E5_VID_ID ||
200                             UBOX_READ_2(dev, PCIR_DEVICE) !=
201                             PCI_E5_UBOX0_DID_ID)
202                                 break;
203
204                         val = UBOX_READ_4(dev, PCI_E5_UBOX0_CPUNODEID);
205                         node = __SHIFTOUT(val,
206                             PCI_E5_UBOX0_CPUNODEID_LCLNODEID);
207
208                         ksnprintf(desc, sizeof(desc), "%s node%d, channel%d",
209                             t->desc, node, t->chan);
210                         device_set_desc_copy(dev, desc);
211
212                         sc->ecc_chan = t->chan;
213                         sc->ecc_node = node;
214                         return 0;
215                 }
216         }
217         return ENXIO;
218 }
219
220 static int
221 ecc_e5_attach(device_t dev)
222 {
223         struct ecc_e5_softc *sc = device_get_softc(dev);
224         uint32_t mcmtr;
225         uint32_t dimmmtr[PCI_E5_IMC_DIMM_MAX];
226         int dimm, rank;
227
228         callout_init_mp(&sc->ecc_callout);
229         sc->ecc_dev = dev;
230
231         mcmtr = IMC_CPGC_READ_4(sc->ecc_dev, PCI_E5_IMC_CPGC_MCMTR);
232         if (bootverbose) {
233                 if (__SHIFTOUT(mcmtr, PCI_E5_IMC_CPGC_MCMTR_IMC_MODE) ==
234                     PCI_E5_IMC_CPGC_MCMTR_IMC_MODE_DDR3)
235                         ecc_printf(sc, "native DDR3\n");
236         }
237
238         rank = 0;
239         for (dimm = 0; dimm < PCI_E5_IMC_DIMM_MAX; ++dimm) {
240                 const char *width;
241                 int rank_cnt, r;
242                 int density;
243                 int val;
244
245                 dimmmtr[dimm] = IMC_CTAD_READ_4(sc->ecc_dev, sc->ecc_chan,
246                     PCI_E5_IMC_CTAD_DIMMMTR(dimm));
247
248                 if ((dimmmtr[dimm] & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) == 0)
249                         continue;
250
251                 val = __SHIFTOUT(dimmmtr[dimm],
252                     PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT);
253                 switch (val) {
254                 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_SR:
255                         rank_cnt = 1;
256                         break;
257                 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_DR:
258                         rank_cnt = 2;
259                         break;
260                 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_QR:
261                         rank_cnt = 4;
262                         break;
263                 default:
264                         ecc_printf(sc, "unknown rank count 0x%x\n", val);
265                         return ENXIO;
266                 }
267
268                 val = __SHIFTOUT(dimmmtr[dimm],
269                     PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH);
270                 switch (val) {
271                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_4:
272                         width = "x4";
273                         break;
274                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_8:
275                         width = "x8";
276                         break;
277                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_16:
278                         width = "x16";
279                         break;
280                 default:
281                         ecc_printf(sc, "unknown ddr3 width 0x%x\n", val);
282                         return ENXIO;
283                 }
284
285                 val = __SHIFTOUT(dimmmtr[dimm],
286                     PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY);
287                 switch (val) {
288                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_1G:
289                         density = 1;
290                         break;
291                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_2G:
292                         density = 2;
293                         break;
294                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_4G:
295                         density = 4;
296                         break;
297                 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_8G:
298                         density = 8;
299                         break;
300                 default:
301                         ecc_printf(sc, "unknown ddr3 density 0x%x\n", val);
302                         return ENXIO;
303                 }
304
305                 if (bootverbose) {
306                         ecc_printf(sc, "DIMM%d %dGB, %d%s, density %dGB\n",
307                             dimm, density * rank_cnt * 2,
308                             rank_cnt, width, density);
309                 }
310
311                 for (r = 0; r < rank_cnt; ++r) {
312                         struct ecc_e5_rank *rk;
313
314                         if (rank >= PCI_E5_IMC_ERROR_RANK_MAX) {
315                                 ecc_printf(sc, "too many ranks\n");
316                                 return ENXIO;
317                         }
318                         rk = &sc->ecc_rank[rank];
319
320                         rk->rank_dimm = dimm;
321                         rk->rank_dimm_rank = r;
322
323                         ++rank;
324                 }
325         }
326         sc->ecc_rank_cnt = rank;
327
328         if ((mcmtr & PCI_E5_IMC_CPGC_MCMTR_ECC_EN) == 0) {
329                 ecc_printf(sc, "ECC is not enabled\n");
330                 return 0;
331         }
332
333         if (bootverbose) {
334                 for (rank = 0; rank < sc->ecc_rank_cnt; ++rank) {
335                         const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
336                         uint32_t thr, mask;
337                         int ofs;
338
339                         ofs = PCI_E5_IMC_ERROR_COR_ERR_TH(rank / 2);
340                         if (rank & 1)
341                                 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI;
342                         else
343                                 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI;
344
345                         thr = pci_read_config(sc->ecc_dev, ofs, 4);
346                         ecc_printf(sc, "DIMM%d rank%d, "
347                             "corrected error threshold %d\n",
348                             rk->rank_dimm, rk->rank_dimm_rank,
349                             __SHIFTOUT(thr, mask));
350                 }
351         }
352
353         callout_reset(&sc->ecc_callout, hz, ecc_e5_callout, sc);
354         return 0;
355 }
356
357 static void
358 ecc_e5_callout(void *xsc)
359 {
360         struct ecc_e5_softc *sc = xsc;
361         uint32_t err_ranks, val;
362
363         val = pci_read_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 4);
364
365         err_ranks = (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS);
366         while (err_ranks != 0) {
367                 int rank;
368
369                 rank = ffs(err_ranks) - 1;
370                 err_ranks &= ~(1 << rank);
371
372                 if (rank < sc->ecc_rank_cnt) {
373                         const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
374                         uint32_t err, mask;
375                         int ofs;
376
377                         ofs = PCI_E5_IMC_ERROR_COR_ERR_CNT(rank / 2);
378                         if (rank & 1)
379                                 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_HI;
380                         else
381                                 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_LO;
382
383                         err = pci_read_config(sc->ecc_dev, ofs, 4);
384                         ecc_printf(sc, "node%d channel%d DIMM%d rank%d, "
385                             "too many errors %d",
386                             sc->ecc_node, sc->ecc_chan,
387                             rk->rank_dimm, rk->rank_dimm_rank,
388                             __SHIFTOUT(err, mask));
389                 }
390         }
391
392         if (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS) {
393                 pci_write_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT,
394                     val, 4);
395         }
396         callout_reset(&sc->ecc_callout, hz, ecc_e5_callout, sc);
397 }
398
399 static void
400 ecc_e5_stop(device_t dev)
401 {
402         struct ecc_e5_softc *sc = device_get_softc(dev);
403
404         callout_stop_sync(&sc->ecc_callout);
405 }
406
407 static int
408 ecc_e5_detach(device_t dev)
409 {
410         ecc_e5_stop(dev);
411         return 0;
412 }
413
414 static void
415 ecc_e5_shutdown(device_t dev)
416 {
417         ecc_e5_stop(dev);
418 }