Update drm/radeon to Linux 4.7.10 as much as possible...
[dragonfly.git] / sys / dev / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45
46 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
47 MODULE_FIRMWARE("radeon/bonaire_me.bin");
48 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
53 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
54
55 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
64
65 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
66 MODULE_FIRMWARE("radeon/hawaii_me.bin");
67 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
72 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
73
74 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
80
81 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
82 MODULE_FIRMWARE("radeon/kaveri_me.bin");
83 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
86 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
87 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
88
89 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
90 MODULE_FIRMWARE("radeon/KABINI_me.bin");
91 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
92 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
93 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
94 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
95
96 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
97 MODULE_FIRMWARE("radeon/kabini_me.bin");
98 MODULE_FIRMWARE("radeon/kabini_ce.bin");
99 MODULE_FIRMWARE("radeon/kabini_mec.bin");
100 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
101 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
102
103 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
109
110 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
111 MODULE_FIRMWARE("radeon/mullins_me.bin");
112 MODULE_FIRMWARE("radeon/mullins_ce.bin");
113 MODULE_FIRMWARE("radeon/mullins_mec.bin");
114 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
115 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
116
117 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
118 static void cik_rlc_stop(struct radeon_device *rdev);
119 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
120 static void cik_program_aspm(struct radeon_device *rdev);
121 static void cik_init_pg(struct radeon_device *rdev);
122 static void cik_init_cg(struct radeon_device *rdev);
123 static void cik_fini_pg(struct radeon_device *rdev);
124 static void cik_fini_cg(struct radeon_device *rdev);
125 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
126                                           bool enable);
127 /**
128  * cik_get_allowed_info_register - fetch the register for the info ioctl
129  *
130  * @rdev: radeon_device pointer
131  * @reg: register offset in bytes
132  * @val: register value
133  *
134  * Returns 0 for success or -EINVAL for an invalid register
135  *
136  */
137 int cik_get_allowed_info_register(struct radeon_device *rdev,
138                                   u32 reg, u32 *val)
139 {
140         switch (reg) {
141         case GRBM_STATUS:
142         case GRBM_STATUS2:
143         case GRBM_STATUS_SE0:
144         case GRBM_STATUS_SE1:
145         case GRBM_STATUS_SE2:
146         case GRBM_STATUS_SE3:
147         case SRBM_STATUS:
148         case SRBM_STATUS2:
149         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
150         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
151         case UVD_STATUS:
152         /* TODO VCE */
153                 *val = RREG32(reg);
154                 return 0;
155         default:
156                 return -EINVAL;
157         }
158 }
159
160 /*
161  * Indirect registers accessor
162  */
163 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
164 {
165         unsigned long flags;
166         u32 r;
167
168         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
169         WREG32(CIK_DIDT_IND_INDEX, (reg));
170         r = RREG32(CIK_DIDT_IND_DATA);
171         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
172         return r;
173 }
174
175 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
176 {
177         unsigned long flags;
178
179         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
180         WREG32(CIK_DIDT_IND_INDEX, (reg));
181         WREG32(CIK_DIDT_IND_DATA, (v));
182         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
183 }
184
185 /* get temperature in millidegrees */
186 int ci_get_temp(struct radeon_device *rdev)
187 {
188         u32 temp;
189         int actual_temp = 0;
190
191         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
192                 CTF_TEMP_SHIFT;
193
194         if (temp & 0x200)
195                 actual_temp = 255;
196         else
197                 actual_temp = temp & 0x1ff;
198
199         actual_temp = actual_temp * 1000;
200
201         return actual_temp;
202 }
203
204 /* get temperature in millidegrees */
205 int kv_get_temp(struct radeon_device *rdev)
206 {
207         u32 temp;
208         int actual_temp = 0;
209
210         temp = RREG32_SMC(0xC0300E0C);
211
212         if (temp)
213                 actual_temp = (temp / 8) - 49;
214         else
215                 actual_temp = 0;
216
217         actual_temp = actual_temp * 1000;
218
219         return actual_temp;
220 }
221
222 /*
223  * Indirect registers accessor
224  */
225 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
226 {
227         unsigned long flags;
228         u32 r;
229
230         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
231         WREG32(PCIE_INDEX, reg);
232         (void)RREG32(PCIE_INDEX);
233         r = RREG32(PCIE_DATA);
234         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
235         return r;
236 }
237
238 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
239 {
240         unsigned long flags;
241
242         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
243         WREG32(PCIE_INDEX, reg);
244         (void)RREG32(PCIE_INDEX);
245         WREG32(PCIE_DATA, v);
246         (void)RREG32(PCIE_DATA);
247         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
248 }
249
250 static const u32 spectre_rlc_save_restore_register_list[] =
251 {
252         (0x0e00 << 16) | (0xc12c >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc140 >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc150 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc15c >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc168 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc170 >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc178 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc204 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc2b4 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc2b8 >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc2bc >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0xc2c0 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x8228 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x829c >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0x869c >> 2),
281         0x00000000,
282         (0x0600 << 16) | (0x98f4 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0x98f8 >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0x9900 >> 2),
287         0x00000000,
288         (0x0e00 << 16) | (0xc260 >> 2),
289         0x00000000,
290         (0x0e00 << 16) | (0x90e8 >> 2),
291         0x00000000,
292         (0x0e00 << 16) | (0x3c000 >> 2),
293         0x00000000,
294         (0x0e00 << 16) | (0x3c00c >> 2),
295         0x00000000,
296         (0x0e00 << 16) | (0x8c1c >> 2),
297         0x00000000,
298         (0x0e00 << 16) | (0x9700 >> 2),
299         0x00000000,
300         (0x0e00 << 16) | (0xcd20 >> 2),
301         0x00000000,
302         (0x4e00 << 16) | (0xcd20 >> 2),
303         0x00000000,
304         (0x5e00 << 16) | (0xcd20 >> 2),
305         0x00000000,
306         (0x6e00 << 16) | (0xcd20 >> 2),
307         0x00000000,
308         (0x7e00 << 16) | (0xcd20 >> 2),
309         0x00000000,
310         (0x8e00 << 16) | (0xcd20 >> 2),
311         0x00000000,
312         (0x9e00 << 16) | (0xcd20 >> 2),
313         0x00000000,
314         (0xae00 << 16) | (0xcd20 >> 2),
315         0x00000000,
316         (0xbe00 << 16) | (0xcd20 >> 2),
317         0x00000000,
318         (0x0e00 << 16) | (0x89bc >> 2),
319         0x00000000,
320         (0x0e00 << 16) | (0x8900 >> 2),
321         0x00000000,
322         0x3,
323         (0x0e00 << 16) | (0xc130 >> 2),
324         0x00000000,
325         (0x0e00 << 16) | (0xc134 >> 2),
326         0x00000000,
327         (0x0e00 << 16) | (0xc1fc >> 2),
328         0x00000000,
329         (0x0e00 << 16) | (0xc208 >> 2),
330         0x00000000,
331         (0x0e00 << 16) | (0xc264 >> 2),
332         0x00000000,
333         (0x0e00 << 16) | (0xc268 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0xc26c >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0xc270 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0xc274 >> 2),
340         0x00000000,
341         (0x0e00 << 16) | (0xc278 >> 2),
342         0x00000000,
343         (0x0e00 << 16) | (0xc27c >> 2),
344         0x00000000,
345         (0x0e00 << 16) | (0xc280 >> 2),
346         0x00000000,
347         (0x0e00 << 16) | (0xc284 >> 2),
348         0x00000000,
349         (0x0e00 << 16) | (0xc288 >> 2),
350         0x00000000,
351         (0x0e00 << 16) | (0xc28c >> 2),
352         0x00000000,
353         (0x0e00 << 16) | (0xc290 >> 2),
354         0x00000000,
355         (0x0e00 << 16) | (0xc294 >> 2),
356         0x00000000,
357         (0x0e00 << 16) | (0xc298 >> 2),
358         0x00000000,
359         (0x0e00 << 16) | (0xc29c >> 2),
360         0x00000000,
361         (0x0e00 << 16) | (0xc2a0 >> 2),
362         0x00000000,
363         (0x0e00 << 16) | (0xc2a4 >> 2),
364         0x00000000,
365         (0x0e00 << 16) | (0xc2a8 >> 2),
366         0x00000000,
367         (0x0e00 << 16) | (0xc2ac  >> 2),
368         0x00000000,
369         (0x0e00 << 16) | (0xc2b0 >> 2),
370         0x00000000,
371         (0x0e00 << 16) | (0x301d0 >> 2),
372         0x00000000,
373         (0x0e00 << 16) | (0x30238 >> 2),
374         0x00000000,
375         (0x0e00 << 16) | (0x30250 >> 2),
376         0x00000000,
377         (0x0e00 << 16) | (0x30254 >> 2),
378         0x00000000,
379         (0x0e00 << 16) | (0x30258 >> 2),
380         0x00000000,
381         (0x0e00 << 16) | (0x3025c >> 2),
382         0x00000000,
383         (0x4e00 << 16) | (0xc900 >> 2),
384         0x00000000,
385         (0x5e00 << 16) | (0xc900 >> 2),
386         0x00000000,
387         (0x6e00 << 16) | (0xc900 >> 2),
388         0x00000000,
389         (0x7e00 << 16) | (0xc900 >> 2),
390         0x00000000,
391         (0x8e00 << 16) | (0xc900 >> 2),
392         0x00000000,
393         (0x9e00 << 16) | (0xc900 >> 2),
394         0x00000000,
395         (0xae00 << 16) | (0xc900 >> 2),
396         0x00000000,
397         (0xbe00 << 16) | (0xc900 >> 2),
398         0x00000000,
399         (0x4e00 << 16) | (0xc904 >> 2),
400         0x00000000,
401         (0x5e00 << 16) | (0xc904 >> 2),
402         0x00000000,
403         (0x6e00 << 16) | (0xc904 >> 2),
404         0x00000000,
405         (0x7e00 << 16) | (0xc904 >> 2),
406         0x00000000,
407         (0x8e00 << 16) | (0xc904 >> 2),
408         0x00000000,
409         (0x9e00 << 16) | (0xc904 >> 2),
410         0x00000000,
411         (0xae00 << 16) | (0xc904 >> 2),
412         0x00000000,
413         (0xbe00 << 16) | (0xc904 >> 2),
414         0x00000000,
415         (0x4e00 << 16) | (0xc908 >> 2),
416         0x00000000,
417         (0x5e00 << 16) | (0xc908 >> 2),
418         0x00000000,
419         (0x6e00 << 16) | (0xc908 >> 2),
420         0x00000000,
421         (0x7e00 << 16) | (0xc908 >> 2),
422         0x00000000,
423         (0x8e00 << 16) | (0xc908 >> 2),
424         0x00000000,
425         (0x9e00 << 16) | (0xc908 >> 2),
426         0x00000000,
427         (0xae00 << 16) | (0xc908 >> 2),
428         0x00000000,
429         (0xbe00 << 16) | (0xc908 >> 2),
430         0x00000000,
431         (0x4e00 << 16) | (0xc90c >> 2),
432         0x00000000,
433         (0x5e00 << 16) | (0xc90c >> 2),
434         0x00000000,
435         (0x6e00 << 16) | (0xc90c >> 2),
436         0x00000000,
437         (0x7e00 << 16) | (0xc90c >> 2),
438         0x00000000,
439         (0x8e00 << 16) | (0xc90c >> 2),
440         0x00000000,
441         (0x9e00 << 16) | (0xc90c >> 2),
442         0x00000000,
443         (0xae00 << 16) | (0xc90c >> 2),
444         0x00000000,
445         (0xbe00 << 16) | (0xc90c >> 2),
446         0x00000000,
447         (0x4e00 << 16) | (0xc910 >> 2),
448         0x00000000,
449         (0x5e00 << 16) | (0xc910 >> 2),
450         0x00000000,
451         (0x6e00 << 16) | (0xc910 >> 2),
452         0x00000000,
453         (0x7e00 << 16) | (0xc910 >> 2),
454         0x00000000,
455         (0x8e00 << 16) | (0xc910 >> 2),
456         0x00000000,
457         (0x9e00 << 16) | (0xc910 >> 2),
458         0x00000000,
459         (0xae00 << 16) | (0xc910 >> 2),
460         0x00000000,
461         (0xbe00 << 16) | (0xc910 >> 2),
462         0x00000000,
463         (0x0e00 << 16) | (0xc99c >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0x9834 >> 2),
466         0x00000000,
467         (0x0000 << 16) | (0x30f00 >> 2),
468         0x00000000,
469         (0x0001 << 16) | (0x30f00 >> 2),
470         0x00000000,
471         (0x0000 << 16) | (0x30f04 >> 2),
472         0x00000000,
473         (0x0001 << 16) | (0x30f04 >> 2),
474         0x00000000,
475         (0x0000 << 16) | (0x30f08 >> 2),
476         0x00000000,
477         (0x0001 << 16) | (0x30f08 >> 2),
478         0x00000000,
479         (0x0000 << 16) | (0x30f0c >> 2),
480         0x00000000,
481         (0x0001 << 16) | (0x30f0c >> 2),
482         0x00000000,
483         (0x0600 << 16) | (0x9b7c >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0x8a14 >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0x8a18 >> 2),
488         0x00000000,
489         (0x0600 << 16) | (0x30a00 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0x8bf0 >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0x8bcc >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0x8b24 >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0x30a04 >> 2),
498         0x00000000,
499         (0x0600 << 16) | (0x30a10 >> 2),
500         0x00000000,
501         (0x0600 << 16) | (0x30a14 >> 2),
502         0x00000000,
503         (0x0600 << 16) | (0x30a18 >> 2),
504         0x00000000,
505         (0x0600 << 16) | (0x30a2c >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0xc700 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0xc704 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0xc708 >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0xc768 >> 2),
514         0x00000000,
515         (0x0400 << 16) | (0xc770 >> 2),
516         0x00000000,
517         (0x0400 << 16) | (0xc774 >> 2),
518         0x00000000,
519         (0x0400 << 16) | (0xc778 >> 2),
520         0x00000000,
521         (0x0400 << 16) | (0xc77c >> 2),
522         0x00000000,
523         (0x0400 << 16) | (0xc780 >> 2),
524         0x00000000,
525         (0x0400 << 16) | (0xc784 >> 2),
526         0x00000000,
527         (0x0400 << 16) | (0xc788 >> 2),
528         0x00000000,
529         (0x0400 << 16) | (0xc78c >> 2),
530         0x00000000,
531         (0x0400 << 16) | (0xc798 >> 2),
532         0x00000000,
533         (0x0400 << 16) | (0xc79c >> 2),
534         0x00000000,
535         (0x0400 << 16) | (0xc7a0 >> 2),
536         0x00000000,
537         (0x0400 << 16) | (0xc7a4 >> 2),
538         0x00000000,
539         (0x0400 << 16) | (0xc7a8 >> 2),
540         0x00000000,
541         (0x0400 << 16) | (0xc7ac >> 2),
542         0x00000000,
543         (0x0400 << 16) | (0xc7b0 >> 2),
544         0x00000000,
545         (0x0400 << 16) | (0xc7b4 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0x9100 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0x3c010 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0x92a8 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x92ac >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x92b4 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x92b8 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x92bc >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x92c0 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x92c4 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x92c8 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x92cc >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x92d0 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x8c00 >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x8c04 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x8c20 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x8c38 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x8c3c >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0xae00 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x9604 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0xac08 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0xac0c >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0xac10 >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0xac14 >> 2),
592         0x00000000,
593         (0x0e00 << 16) | (0xac58 >> 2),
594         0x00000000,
595         (0x0e00 << 16) | (0xac68 >> 2),
596         0x00000000,
597         (0x0e00 << 16) | (0xac6c >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xac70 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xac74 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0xac78 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0xac7c >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xac80 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xac84 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xac88 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xac8c >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0x970c >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0x9714 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x9718 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x971c >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x31068 >> 2),
624         0x00000000,
625         (0x4e00 << 16) | (0x31068 >> 2),
626         0x00000000,
627         (0x5e00 << 16) | (0x31068 >> 2),
628         0x00000000,
629         (0x6e00 << 16) | (0x31068 >> 2),
630         0x00000000,
631         (0x7e00 << 16) | (0x31068 >> 2),
632         0x00000000,
633         (0x8e00 << 16) | (0x31068 >> 2),
634         0x00000000,
635         (0x9e00 << 16) | (0x31068 >> 2),
636         0x00000000,
637         (0xae00 << 16) | (0x31068 >> 2),
638         0x00000000,
639         (0xbe00 << 16) | (0x31068 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0xcd10 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0xcd14 >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0x88b0 >> 2),
646         0x00000000,
647         (0x0e00 << 16) | (0x88b4 >> 2),
648         0x00000000,
649         (0x0e00 << 16) | (0x88b8 >> 2),
650         0x00000000,
651         (0x0e00 << 16) | (0x88bc >> 2),
652         0x00000000,
653         (0x0400 << 16) | (0x89c0 >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0x88c4 >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0x88c8 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0x88d0 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0x88d4 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0x88d8 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0x8980 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0x30938 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0x3093c >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0x30940 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0x89a0 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0x30900 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0x30904 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x89b4 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x3c210 >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x3c214 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0x3c218 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0x8904 >> 2),
688         0x00000000,
689         0x5,
690         (0x0e00 << 16) | (0x8c28 >> 2),
691         (0x0e00 << 16) | (0x8c2c >> 2),
692         (0x0e00 << 16) | (0x8c30 >> 2),
693         (0x0e00 << 16) | (0x8c34 >> 2),
694         (0x0e00 << 16) | (0x9600 >> 2),
695 };
696
697 static const u32 kalindi_rlc_save_restore_register_list[] =
698 {
699         (0x0e00 << 16) | (0xc12c >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0xc140 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0xc150 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0xc15c >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0xc168 >> 2),
708         0x00000000,
709         (0x0e00 << 16) | (0xc170 >> 2),
710         0x00000000,
711         (0x0e00 << 16) | (0xc204 >> 2),
712         0x00000000,
713         (0x0e00 << 16) | (0xc2b4 >> 2),
714         0x00000000,
715         (0x0e00 << 16) | (0xc2b8 >> 2),
716         0x00000000,
717         (0x0e00 << 16) | (0xc2bc >> 2),
718         0x00000000,
719         (0x0e00 << 16) | (0xc2c0 >> 2),
720         0x00000000,
721         (0x0e00 << 16) | (0x8228 >> 2),
722         0x00000000,
723         (0x0e00 << 16) | (0x829c >> 2),
724         0x00000000,
725         (0x0e00 << 16) | (0x869c >> 2),
726         0x00000000,
727         (0x0600 << 16) | (0x98f4 >> 2),
728         0x00000000,
729         (0x0e00 << 16) | (0x98f8 >> 2),
730         0x00000000,
731         (0x0e00 << 16) | (0x9900 >> 2),
732         0x00000000,
733         (0x0e00 << 16) | (0xc260 >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0x90e8 >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0x3c000 >> 2),
738         0x00000000,
739         (0x0e00 << 16) | (0x3c00c >> 2),
740         0x00000000,
741         (0x0e00 << 16) | (0x8c1c >> 2),
742         0x00000000,
743         (0x0e00 << 16) | (0x9700 >> 2),
744         0x00000000,
745         (0x0e00 << 16) | (0xcd20 >> 2),
746         0x00000000,
747         (0x4e00 << 16) | (0xcd20 >> 2),
748         0x00000000,
749         (0x5e00 << 16) | (0xcd20 >> 2),
750         0x00000000,
751         (0x6e00 << 16) | (0xcd20 >> 2),
752         0x00000000,
753         (0x7e00 << 16) | (0xcd20 >> 2),
754         0x00000000,
755         (0x0e00 << 16) | (0x89bc >> 2),
756         0x00000000,
757         (0x0e00 << 16) | (0x8900 >> 2),
758         0x00000000,
759         0x3,
760         (0x0e00 << 16) | (0xc130 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0xc134 >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0xc1fc >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0xc208 >> 2),
767         0x00000000,
768         (0x0e00 << 16) | (0xc264 >> 2),
769         0x00000000,
770         (0x0e00 << 16) | (0xc268 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0xc26c >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0xc270 >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0xc274 >> 2),
777         0x00000000,
778         (0x0e00 << 16) | (0xc28c >> 2),
779         0x00000000,
780         (0x0e00 << 16) | (0xc290 >> 2),
781         0x00000000,
782         (0x0e00 << 16) | (0xc294 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc298 >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc2a0 >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0xc2a4 >> 2),
789         0x00000000,
790         (0x0e00 << 16) | (0xc2a8 >> 2),
791         0x00000000,
792         (0x0e00 << 16) | (0xc2ac >> 2),
793         0x00000000,
794         (0x0e00 << 16) | (0x301d0 >> 2),
795         0x00000000,
796         (0x0e00 << 16) | (0x30238 >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0x30250 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0x30254 >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0x30258 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0x3025c >> 2),
805         0x00000000,
806         (0x4e00 << 16) | (0xc900 >> 2),
807         0x00000000,
808         (0x5e00 << 16) | (0xc900 >> 2),
809         0x00000000,
810         (0x6e00 << 16) | (0xc900 >> 2),
811         0x00000000,
812         (0x7e00 << 16) | (0xc900 >> 2),
813         0x00000000,
814         (0x4e00 << 16) | (0xc904 >> 2),
815         0x00000000,
816         (0x5e00 << 16) | (0xc904 >> 2),
817         0x00000000,
818         (0x6e00 << 16) | (0xc904 >> 2),
819         0x00000000,
820         (0x7e00 << 16) | (0xc904 >> 2),
821         0x00000000,
822         (0x4e00 << 16) | (0xc908 >> 2),
823         0x00000000,
824         (0x5e00 << 16) | (0xc908 >> 2),
825         0x00000000,
826         (0x6e00 << 16) | (0xc908 >> 2),
827         0x00000000,
828         (0x7e00 << 16) | (0xc908 >> 2),
829         0x00000000,
830         (0x4e00 << 16) | (0xc90c >> 2),
831         0x00000000,
832         (0x5e00 << 16) | (0xc90c >> 2),
833         0x00000000,
834         (0x6e00 << 16) | (0xc90c >> 2),
835         0x00000000,
836         (0x7e00 << 16) | (0xc90c >> 2),
837         0x00000000,
838         (0x4e00 << 16) | (0xc910 >> 2),
839         0x00000000,
840         (0x5e00 << 16) | (0xc910 >> 2),
841         0x00000000,
842         (0x6e00 << 16) | (0xc910 >> 2),
843         0x00000000,
844         (0x7e00 << 16) | (0xc910 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0xc99c >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0x9834 >> 2),
849         0x00000000,
850         (0x0000 << 16) | (0x30f00 >> 2),
851         0x00000000,
852         (0x0000 << 16) | (0x30f04 >> 2),
853         0x00000000,
854         (0x0000 << 16) | (0x30f08 >> 2),
855         0x00000000,
856         (0x0000 << 16) | (0x30f0c >> 2),
857         0x00000000,
858         (0x0600 << 16) | (0x9b7c >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0x8a14 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0x8a18 >> 2),
863         0x00000000,
864         (0x0600 << 16) | (0x30a00 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0x8bf0 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0x8bcc >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0x8b24 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x30a04 >> 2),
873         0x00000000,
874         (0x0600 << 16) | (0x30a10 >> 2),
875         0x00000000,
876         (0x0600 << 16) | (0x30a14 >> 2),
877         0x00000000,
878         (0x0600 << 16) | (0x30a18 >> 2),
879         0x00000000,
880         (0x0600 << 16) | (0x30a2c >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0xc700 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0xc704 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0xc708 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0xc768 >> 2),
889         0x00000000,
890         (0x0400 << 16) | (0xc770 >> 2),
891         0x00000000,
892         (0x0400 << 16) | (0xc774 >> 2),
893         0x00000000,
894         (0x0400 << 16) | (0xc798 >> 2),
895         0x00000000,
896         (0x0400 << 16) | (0xc79c >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0x9100 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0x3c010 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0x8c00 >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x8c04 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x8c20 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x8c38 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x8c3c >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0xae00 >> 2),
913         0x00000000,
914         (0x0e00 << 16) | (0x9604 >> 2),
915         0x00000000,
916         (0x0e00 << 16) | (0xac08 >> 2),
917         0x00000000,
918         (0x0e00 << 16) | (0xac0c >> 2),
919         0x00000000,
920         (0x0e00 << 16) | (0xac10 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0xac14 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0xac58 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0xac68 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0xac6c >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0xac70 >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0xac74 >> 2),
933         0x00000000,
934         (0x0e00 << 16) | (0xac78 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0xac7c >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0xac80 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0xac84 >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0xac88 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0xac8c >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0x970c >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0x9714 >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0x9718 >> 2),
951         0x00000000,
952         (0x0e00 << 16) | (0x971c >> 2),
953         0x00000000,
954         (0x0e00 << 16) | (0x31068 >> 2),
955         0x00000000,
956         (0x4e00 << 16) | (0x31068 >> 2),
957         0x00000000,
958         (0x5e00 << 16) | (0x31068 >> 2),
959         0x00000000,
960         (0x6e00 << 16) | (0x31068 >> 2),
961         0x00000000,
962         (0x7e00 << 16) | (0x31068 >> 2),
963         0x00000000,
964         (0x0e00 << 16) | (0xcd10 >> 2),
965         0x00000000,
966         (0x0e00 << 16) | (0xcd14 >> 2),
967         0x00000000,
968         (0x0e00 << 16) | (0x88b0 >> 2),
969         0x00000000,
970         (0x0e00 << 16) | (0x88b4 >> 2),
971         0x00000000,
972         (0x0e00 << 16) | (0x88b8 >> 2),
973         0x00000000,
974         (0x0e00 << 16) | (0x88bc >> 2),
975         0x00000000,
976         (0x0400 << 16) | (0x89c0 >> 2),
977         0x00000000,
978         (0x0e00 << 16) | (0x88c4 >> 2),
979         0x00000000,
980         (0x0e00 << 16) | (0x88c8 >> 2),
981         0x00000000,
982         (0x0e00 << 16) | (0x88d0 >> 2),
983         0x00000000,
984         (0x0e00 << 16) | (0x88d4 >> 2),
985         0x00000000,
986         (0x0e00 << 16) | (0x88d8 >> 2),
987         0x00000000,
988         (0x0e00 << 16) | (0x8980 >> 2),
989         0x00000000,
990         (0x0e00 << 16) | (0x30938 >> 2),
991         0x00000000,
992         (0x0e00 << 16) | (0x3093c >> 2),
993         0x00000000,
994         (0x0e00 << 16) | (0x30940 >> 2),
995         0x00000000,
996         (0x0e00 << 16) | (0x89a0 >> 2),
997         0x00000000,
998         (0x0e00 << 16) | (0x30900 >> 2),
999         0x00000000,
1000         (0x0e00 << 16) | (0x30904 >> 2),
1001         0x00000000,
1002         (0x0e00 << 16) | (0x89b4 >> 2),
1003         0x00000000,
1004         (0x0e00 << 16) | (0x3e1fc >> 2),
1005         0x00000000,
1006         (0x0e00 << 16) | (0x3c210 >> 2),
1007         0x00000000,
1008         (0x0e00 << 16) | (0x3c214 >> 2),
1009         0x00000000,
1010         (0x0e00 << 16) | (0x3c218 >> 2),
1011         0x00000000,
1012         (0x0e00 << 16) | (0x8904 >> 2),
1013         0x00000000,
1014         0x5,
1015         (0x0e00 << 16) | (0x8c28 >> 2),
1016         (0x0e00 << 16) | (0x8c2c >> 2),
1017         (0x0e00 << 16) | (0x8c30 >> 2),
1018         (0x0e00 << 16) | (0x8c34 >> 2),
1019         (0x0e00 << 16) | (0x9600 >> 2),
1020 };
1021
1022 static const u32 bonaire_golden_spm_registers[] =
1023 {
1024         0x30800, 0xe0ffffff, 0xe0000000
1025 };
1026
1027 static const u32 bonaire_golden_common_registers[] =
1028 {
1029         0xc770, 0xffffffff, 0x00000800,
1030         0xc774, 0xffffffff, 0x00000800,
1031         0xc798, 0xffffffff, 0x00007fbf,
1032         0xc79c, 0xffffffff, 0x00007faf
1033 };
1034
1035 static const u32 bonaire_golden_registers[] =
1036 {
1037         0x3354, 0x00000333, 0x00000333,
1038         0x3350, 0x000c0fc0, 0x00040200,
1039         0x9a10, 0x00010000, 0x00058208,
1040         0x3c000, 0xffff1fff, 0x00140000,
1041         0x3c200, 0xfdfc0fff, 0x00000100,
1042         0x3c234, 0x40000000, 0x40000200,
1043         0x9830, 0xffffffff, 0x00000000,
1044         0x9834, 0xf00fffff, 0x00000400,
1045         0x9838, 0x0002021c, 0x00020200,
1046         0xc78, 0x00000080, 0x00000000,
1047         0x5bb0, 0x000000f0, 0x00000070,
1048         0x5bc0, 0xf0311fff, 0x80300000,
1049         0x98f8, 0x73773777, 0x12010001,
1050         0x350c, 0x00810000, 0x408af000,
1051         0x7030, 0x31000111, 0x00000011,
1052         0x2f48, 0x73773777, 0x12010001,
1053         0x220c, 0x00007fb6, 0x0021a1b1,
1054         0x2210, 0x00007fb6, 0x002021b1,
1055         0x2180, 0x00007fb6, 0x00002191,
1056         0x2218, 0x00007fb6, 0x002121b1,
1057         0x221c, 0x00007fb6, 0x002021b1,
1058         0x21dc, 0x00007fb6, 0x00002191,
1059         0x21e0, 0x00007fb6, 0x00002191,
1060         0x3628, 0x0000003f, 0x0000000a,
1061         0x362c, 0x0000003f, 0x0000000a,
1062         0x2ae4, 0x00073ffe, 0x000022a2,
1063         0x240c, 0x000007ff, 0x00000000,
1064         0x8a14, 0xf000003f, 0x00000007,
1065         0x8bf0, 0x00002001, 0x00000001,
1066         0x8b24, 0xffffffff, 0x00ffffff,
1067         0x30a04, 0x0000ff0f, 0x00000000,
1068         0x28a4c, 0x07ffffff, 0x06000000,
1069         0x4d8, 0x00000fff, 0x00000100,
1070         0x3e78, 0x00000001, 0x00000002,
1071         0x9100, 0x03000000, 0x0362c688,
1072         0x8c00, 0x000000ff, 0x00000001,
1073         0xe40, 0x00001fff, 0x00001fff,
1074         0x9060, 0x0000007f, 0x00000020,
1075         0x9508, 0x00010000, 0x00010000,
1076         0xac14, 0x000003ff, 0x000000f3,
1077         0xac0c, 0xffffffff, 0x00001032
1078 };
1079
1080 static const u32 bonaire_mgcg_cgcg_init[] =
1081 {
1082         0xc420, 0xffffffff, 0xfffffffc,
1083         0x30800, 0xffffffff, 0xe0000000,
1084         0x3c2a0, 0xffffffff, 0x00000100,
1085         0x3c208, 0xffffffff, 0x00000100,
1086         0x3c2c0, 0xffffffff, 0xc0000100,
1087         0x3c2c8, 0xffffffff, 0xc0000100,
1088         0x3c2c4, 0xffffffff, 0xc0000100,
1089         0x55e4, 0xffffffff, 0x00600100,
1090         0x3c280, 0xffffffff, 0x00000100,
1091         0x3c214, 0xffffffff, 0x06000100,
1092         0x3c220, 0xffffffff, 0x00000100,
1093         0x3c218, 0xffffffff, 0x06000100,
1094         0x3c204, 0xffffffff, 0x00000100,
1095         0x3c2e0, 0xffffffff, 0x00000100,
1096         0x3c224, 0xffffffff, 0x00000100,
1097         0x3c200, 0xffffffff, 0x00000100,
1098         0x3c230, 0xffffffff, 0x00000100,
1099         0x3c234, 0xffffffff, 0x00000100,
1100         0x3c250, 0xffffffff, 0x00000100,
1101         0x3c254, 0xffffffff, 0x00000100,
1102         0x3c258, 0xffffffff, 0x00000100,
1103         0x3c25c, 0xffffffff, 0x00000100,
1104         0x3c260, 0xffffffff, 0x00000100,
1105         0x3c27c, 0xffffffff, 0x00000100,
1106         0x3c278, 0xffffffff, 0x00000100,
1107         0x3c210, 0xffffffff, 0x06000100,
1108         0x3c290, 0xffffffff, 0x00000100,
1109         0x3c274, 0xffffffff, 0x00000100,
1110         0x3c2b4, 0xffffffff, 0x00000100,
1111         0x3c2b0, 0xffffffff, 0x00000100,
1112         0x3c270, 0xffffffff, 0x00000100,
1113         0x30800, 0xffffffff, 0xe0000000,
1114         0x3c020, 0xffffffff, 0x00010000,
1115         0x3c024, 0xffffffff, 0x00030002,
1116         0x3c028, 0xffffffff, 0x00040007,
1117         0x3c02c, 0xffffffff, 0x00060005,
1118         0x3c030, 0xffffffff, 0x00090008,
1119         0x3c034, 0xffffffff, 0x00010000,
1120         0x3c038, 0xffffffff, 0x00030002,
1121         0x3c03c, 0xffffffff, 0x00040007,
1122         0x3c040, 0xffffffff, 0x00060005,
1123         0x3c044, 0xffffffff, 0x00090008,
1124         0x3c048, 0xffffffff, 0x00010000,
1125         0x3c04c, 0xffffffff, 0x00030002,
1126         0x3c050, 0xffffffff, 0x00040007,
1127         0x3c054, 0xffffffff, 0x00060005,
1128         0x3c058, 0xffffffff, 0x00090008,
1129         0x3c05c, 0xffffffff, 0x00010000,
1130         0x3c060, 0xffffffff, 0x00030002,
1131         0x3c064, 0xffffffff, 0x00040007,
1132         0x3c068, 0xffffffff, 0x00060005,
1133         0x3c06c, 0xffffffff, 0x00090008,
1134         0x3c070, 0xffffffff, 0x00010000,
1135         0x3c074, 0xffffffff, 0x00030002,
1136         0x3c078, 0xffffffff, 0x00040007,
1137         0x3c07c, 0xffffffff, 0x00060005,
1138         0x3c080, 0xffffffff, 0x00090008,
1139         0x3c084, 0xffffffff, 0x00010000,
1140         0x3c088, 0xffffffff, 0x00030002,
1141         0x3c08c, 0xffffffff, 0x00040007,
1142         0x3c090, 0xffffffff, 0x00060005,
1143         0x3c094, 0xffffffff, 0x00090008,
1144         0x3c098, 0xffffffff, 0x00010000,
1145         0x3c09c, 0xffffffff, 0x00030002,
1146         0x3c0a0, 0xffffffff, 0x00040007,
1147         0x3c0a4, 0xffffffff, 0x00060005,
1148         0x3c0a8, 0xffffffff, 0x00090008,
1149         0x3c000, 0xffffffff, 0x96e00200,
1150         0x8708, 0xffffffff, 0x00900100,
1151         0xc424, 0xffffffff, 0x0020003f,
1152         0x38, 0xffffffff, 0x0140001c,
1153         0x3c, 0x000f0000, 0x000f0000,
1154         0x220, 0xffffffff, 0xC060000C,
1155         0x224, 0xc0000fff, 0x00000100,
1156         0xf90, 0xffffffff, 0x00000100,
1157         0xf98, 0x00000101, 0x00000000,
1158         0x20a8, 0xffffffff, 0x00000104,
1159         0x55e4, 0xff000fff, 0x00000100,
1160         0x30cc, 0xc0000fff, 0x00000104,
1161         0xc1e4, 0x00000001, 0x00000001,
1162         0xd00c, 0xff000ff0, 0x00000100,
1163         0xd80c, 0xff000ff0, 0x00000100
1164 };
1165
1166 static const u32 spectre_golden_spm_registers[] =
1167 {
1168         0x30800, 0xe0ffffff, 0xe0000000
1169 };
1170
1171 static const u32 spectre_golden_common_registers[] =
1172 {
1173         0xc770, 0xffffffff, 0x00000800,
1174         0xc774, 0xffffffff, 0x00000800,
1175         0xc798, 0xffffffff, 0x00007fbf,
1176         0xc79c, 0xffffffff, 0x00007faf
1177 };
1178
1179 static const u32 spectre_golden_registers[] =
1180 {
1181         0x3c000, 0xffff1fff, 0x96940200,
1182         0x3c00c, 0xffff0001, 0xff000000,
1183         0x3c200, 0xfffc0fff, 0x00000100,
1184         0x6ed8, 0x00010101, 0x00010000,
1185         0x9834, 0xf00fffff, 0x00000400,
1186         0x9838, 0xfffffffc, 0x00020200,
1187         0x5bb0, 0x000000f0, 0x00000070,
1188         0x5bc0, 0xf0311fff, 0x80300000,
1189         0x98f8, 0x73773777, 0x12010001,
1190         0x9b7c, 0x00ff0000, 0x00fc0000,
1191         0x2f48, 0x73773777, 0x12010001,
1192         0x8a14, 0xf000003f, 0x00000007,
1193         0x8b24, 0xffffffff, 0x00ffffff,
1194         0x28350, 0x3f3f3fff, 0x00000082,
1195         0x28354, 0x0000003f, 0x00000000,
1196         0x3e78, 0x00000001, 0x00000002,
1197         0x913c, 0xffff03df, 0x00000004,
1198         0xc768, 0x00000008, 0x00000008,
1199         0x8c00, 0x000008ff, 0x00000800,
1200         0x9508, 0x00010000, 0x00010000,
1201         0xac0c, 0xffffffff, 0x54763210,
1202         0x214f8, 0x01ff01ff, 0x00000002,
1203         0x21498, 0x007ff800, 0x00200000,
1204         0x2015c, 0xffffffff, 0x00000f40,
1205         0x30934, 0xffffffff, 0x00000001
1206 };
1207
1208 static const u32 spectre_mgcg_cgcg_init[] =
1209 {
1210         0xc420, 0xffffffff, 0xfffffffc,
1211         0x30800, 0xffffffff, 0xe0000000,
1212         0x3c2a0, 0xffffffff, 0x00000100,
1213         0x3c208, 0xffffffff, 0x00000100,
1214         0x3c2c0, 0xffffffff, 0x00000100,
1215         0x3c2c8, 0xffffffff, 0x00000100,
1216         0x3c2c4, 0xffffffff, 0x00000100,
1217         0x55e4, 0xffffffff, 0x00600100,
1218         0x3c280, 0xffffffff, 0x00000100,
1219         0x3c214, 0xffffffff, 0x06000100,
1220         0x3c220, 0xffffffff, 0x00000100,
1221         0x3c218, 0xffffffff, 0x06000100,
1222         0x3c204, 0xffffffff, 0x00000100,
1223         0x3c2e0, 0xffffffff, 0x00000100,
1224         0x3c224, 0xffffffff, 0x00000100,
1225         0x3c200, 0xffffffff, 0x00000100,
1226         0x3c230, 0xffffffff, 0x00000100,
1227         0x3c234, 0xffffffff, 0x00000100,
1228         0x3c250, 0xffffffff, 0x00000100,
1229         0x3c254, 0xffffffff, 0x00000100,
1230         0x3c258, 0xffffffff, 0x00000100,
1231         0x3c25c, 0xffffffff, 0x00000100,
1232         0x3c260, 0xffffffff, 0x00000100,
1233         0x3c27c, 0xffffffff, 0x00000100,
1234         0x3c278, 0xffffffff, 0x00000100,
1235         0x3c210, 0xffffffff, 0x06000100,
1236         0x3c290, 0xffffffff, 0x00000100,
1237         0x3c274, 0xffffffff, 0x00000100,
1238         0x3c2b4, 0xffffffff, 0x00000100,
1239         0x3c2b0, 0xffffffff, 0x00000100,
1240         0x3c270, 0xffffffff, 0x00000100,
1241         0x30800, 0xffffffff, 0xe0000000,
1242         0x3c020, 0xffffffff, 0x00010000,
1243         0x3c024, 0xffffffff, 0x00030002,
1244         0x3c028, 0xffffffff, 0x00040007,
1245         0x3c02c, 0xffffffff, 0x00060005,
1246         0x3c030, 0xffffffff, 0x00090008,
1247         0x3c034, 0xffffffff, 0x00010000,
1248         0x3c038, 0xffffffff, 0x00030002,
1249         0x3c03c, 0xffffffff, 0x00040007,
1250         0x3c040, 0xffffffff, 0x00060005,
1251         0x3c044, 0xffffffff, 0x00090008,
1252         0x3c048, 0xffffffff, 0x00010000,
1253         0x3c04c, 0xffffffff, 0x00030002,
1254         0x3c050, 0xffffffff, 0x00040007,
1255         0x3c054, 0xffffffff, 0x00060005,
1256         0x3c058, 0xffffffff, 0x00090008,
1257         0x3c05c, 0xffffffff, 0x00010000,
1258         0x3c060, 0xffffffff, 0x00030002,
1259         0x3c064, 0xffffffff, 0x00040007,
1260         0x3c068, 0xffffffff, 0x00060005,
1261         0x3c06c, 0xffffffff, 0x00090008,
1262         0x3c070, 0xffffffff, 0x00010000,
1263         0x3c074, 0xffffffff, 0x00030002,
1264         0x3c078, 0xffffffff, 0x00040007,
1265         0x3c07c, 0xffffffff, 0x00060005,
1266         0x3c080, 0xffffffff, 0x00090008,
1267         0x3c084, 0xffffffff, 0x00010000,
1268         0x3c088, 0xffffffff, 0x00030002,
1269         0x3c08c, 0xffffffff, 0x00040007,
1270         0x3c090, 0xffffffff, 0x00060005,
1271         0x3c094, 0xffffffff, 0x00090008,
1272         0x3c098, 0xffffffff, 0x00010000,
1273         0x3c09c, 0xffffffff, 0x00030002,
1274         0x3c0a0, 0xffffffff, 0x00040007,
1275         0x3c0a4, 0xffffffff, 0x00060005,
1276         0x3c0a8, 0xffffffff, 0x00090008,
1277         0x3c0ac, 0xffffffff, 0x00010000,
1278         0x3c0b0, 0xffffffff, 0x00030002,
1279         0x3c0b4, 0xffffffff, 0x00040007,
1280         0x3c0b8, 0xffffffff, 0x00060005,
1281         0x3c0bc, 0xffffffff, 0x00090008,
1282         0x3c000, 0xffffffff, 0x96e00200,
1283         0x8708, 0xffffffff, 0x00900100,
1284         0xc424, 0xffffffff, 0x0020003f,
1285         0x38, 0xffffffff, 0x0140001c,
1286         0x3c, 0x000f0000, 0x000f0000,
1287         0x220, 0xffffffff, 0xC060000C,
1288         0x224, 0xc0000fff, 0x00000100,
1289         0xf90, 0xffffffff, 0x00000100,
1290         0xf98, 0x00000101, 0x00000000,
1291         0x20a8, 0xffffffff, 0x00000104,
1292         0x55e4, 0xff000fff, 0x00000100,
1293         0x30cc, 0xc0000fff, 0x00000104,
1294         0xc1e4, 0x00000001, 0x00000001,
1295         0xd00c, 0xff000ff0, 0x00000100,
1296         0xd80c, 0xff000ff0, 0x00000100
1297 };
1298
1299 static const u32 kalindi_golden_spm_registers[] =
1300 {
1301         0x30800, 0xe0ffffff, 0xe0000000
1302 };
1303
1304 static const u32 kalindi_golden_common_registers[] =
1305 {
1306         0xc770, 0xffffffff, 0x00000800,
1307         0xc774, 0xffffffff, 0x00000800,
1308         0xc798, 0xffffffff, 0x00007fbf,
1309         0xc79c, 0xffffffff, 0x00007faf
1310 };
1311
1312 static const u32 kalindi_golden_registers[] =
1313 {
1314         0x3c000, 0xffffdfff, 0x6e944040,
1315         0x55e4, 0xff607fff, 0xfc000100,
1316         0x3c220, 0xff000fff, 0x00000100,
1317         0x3c224, 0xff000fff, 0x00000100,
1318         0x3c200, 0xfffc0fff, 0x00000100,
1319         0x6ed8, 0x00010101, 0x00010000,
1320         0x9830, 0xffffffff, 0x00000000,
1321         0x9834, 0xf00fffff, 0x00000400,
1322         0x5bb0, 0x000000f0, 0x00000070,
1323         0x5bc0, 0xf0311fff, 0x80300000,
1324         0x98f8, 0x73773777, 0x12010001,
1325         0x98fc, 0xffffffff, 0x00000010,
1326         0x9b7c, 0x00ff0000, 0x00fc0000,
1327         0x8030, 0x00001f0f, 0x0000100a,
1328         0x2f48, 0x73773777, 0x12010001,
1329         0x2408, 0x000fffff, 0x000c007f,
1330         0x8a14, 0xf000003f, 0x00000007,
1331         0x8b24, 0x3fff3fff, 0x00ffcfff,
1332         0x30a04, 0x0000ff0f, 0x00000000,
1333         0x28a4c, 0x07ffffff, 0x06000000,
1334         0x4d8, 0x00000fff, 0x00000100,
1335         0x3e78, 0x00000001, 0x00000002,
1336         0xc768, 0x00000008, 0x00000008,
1337         0x8c00, 0x000000ff, 0x00000003,
1338         0x214f8, 0x01ff01ff, 0x00000002,
1339         0x21498, 0x007ff800, 0x00200000,
1340         0x2015c, 0xffffffff, 0x00000f40,
1341         0x88c4, 0x001f3ae3, 0x00000082,
1342         0x88d4, 0x0000001f, 0x00000010,
1343         0x30934, 0xffffffff, 0x00000000
1344 };
1345
1346 static const u32 kalindi_mgcg_cgcg_init[] =
1347 {
1348         0xc420, 0xffffffff, 0xfffffffc,
1349         0x30800, 0xffffffff, 0xe0000000,
1350         0x3c2a0, 0xffffffff, 0x00000100,
1351         0x3c208, 0xffffffff, 0x00000100,
1352         0x3c2c0, 0xffffffff, 0x00000100,
1353         0x3c2c8, 0xffffffff, 0x00000100,
1354         0x3c2c4, 0xffffffff, 0x00000100,
1355         0x55e4, 0xffffffff, 0x00600100,
1356         0x3c280, 0xffffffff, 0x00000100,
1357         0x3c214, 0xffffffff, 0x06000100,
1358         0x3c220, 0xffffffff, 0x00000100,
1359         0x3c218, 0xffffffff, 0x06000100,
1360         0x3c204, 0xffffffff, 0x00000100,
1361         0x3c2e0, 0xffffffff, 0x00000100,
1362         0x3c224, 0xffffffff, 0x00000100,
1363         0x3c200, 0xffffffff, 0x00000100,
1364         0x3c230, 0xffffffff, 0x00000100,
1365         0x3c234, 0xffffffff, 0x00000100,
1366         0x3c250, 0xffffffff, 0x00000100,
1367         0x3c254, 0xffffffff, 0x00000100,
1368         0x3c258, 0xffffffff, 0x00000100,
1369         0x3c25c, 0xffffffff, 0x00000100,
1370         0x3c260, 0xffffffff, 0x00000100,
1371         0x3c27c, 0xffffffff, 0x00000100,
1372         0x3c278, 0xffffffff, 0x00000100,
1373         0x3c210, 0xffffffff, 0x06000100,
1374         0x3c290, 0xffffffff, 0x00000100,
1375         0x3c274, 0xffffffff, 0x00000100,
1376         0x3c2b4, 0xffffffff, 0x00000100,
1377         0x3c2b0, 0xffffffff, 0x00000100,
1378         0x3c270, 0xffffffff, 0x00000100,
1379         0x30800, 0xffffffff, 0xe0000000,
1380         0x3c020, 0xffffffff, 0x00010000,
1381         0x3c024, 0xffffffff, 0x00030002,
1382         0x3c028, 0xffffffff, 0x00040007,
1383         0x3c02c, 0xffffffff, 0x00060005,
1384         0x3c030, 0xffffffff, 0x00090008,
1385         0x3c034, 0xffffffff, 0x00010000,
1386         0x3c038, 0xffffffff, 0x00030002,
1387         0x3c03c, 0xffffffff, 0x00040007,
1388         0x3c040, 0xffffffff, 0x00060005,
1389         0x3c044, 0xffffffff, 0x00090008,
1390         0x3c000, 0xffffffff, 0x96e00200,
1391         0x8708, 0xffffffff, 0x00900100,
1392         0xc424, 0xffffffff, 0x0020003f,
1393         0x38, 0xffffffff, 0x0140001c,
1394         0x3c, 0x000f0000, 0x000f0000,
1395         0x220, 0xffffffff, 0xC060000C,
1396         0x224, 0xc0000fff, 0x00000100,
1397         0x20a8, 0xffffffff, 0x00000104,
1398         0x55e4, 0xff000fff, 0x00000100,
1399         0x30cc, 0xc0000fff, 0x00000104,
1400         0xc1e4, 0x00000001, 0x00000001,
1401         0xd00c, 0xff000ff0, 0x00000100,
1402         0xd80c, 0xff000ff0, 0x00000100
1403 };
1404
1405 static const u32 hawaii_golden_spm_registers[] =
1406 {
1407         0x30800, 0xe0ffffff, 0xe0000000
1408 };
1409
1410 static const u32 hawaii_golden_common_registers[] =
1411 {
1412         0x30800, 0xffffffff, 0xe0000000,
1413         0x28350, 0xffffffff, 0x3a00161a,
1414         0x28354, 0xffffffff, 0x0000002e,
1415         0x9a10, 0xffffffff, 0x00018208,
1416         0x98f8, 0xffffffff, 0x12011003
1417 };
1418
1419 static const u32 hawaii_golden_registers[] =
1420 {
1421         0x3354, 0x00000333, 0x00000333,
1422         0x9a10, 0x00010000, 0x00058208,
1423         0x9830, 0xffffffff, 0x00000000,
1424         0x9834, 0xf00fffff, 0x00000400,
1425         0x9838, 0x0002021c, 0x00020200,
1426         0xc78, 0x00000080, 0x00000000,
1427         0x5bb0, 0x000000f0, 0x00000070,
1428         0x5bc0, 0xf0311fff, 0x80300000,
1429         0x350c, 0x00810000, 0x408af000,
1430         0x7030, 0x31000111, 0x00000011,
1431         0x2f48, 0x73773777, 0x12010001,
1432         0x2120, 0x0000007f, 0x0000001b,
1433         0x21dc, 0x00007fb6, 0x00002191,
1434         0x3628, 0x0000003f, 0x0000000a,
1435         0x362c, 0x0000003f, 0x0000000a,
1436         0x2ae4, 0x00073ffe, 0x000022a2,
1437         0x240c, 0x000007ff, 0x00000000,
1438         0x8bf0, 0x00002001, 0x00000001,
1439         0x8b24, 0xffffffff, 0x00ffffff,
1440         0x30a04, 0x0000ff0f, 0x00000000,
1441         0x28a4c, 0x07ffffff, 0x06000000,
1442         0x3e78, 0x00000001, 0x00000002,
1443         0xc768, 0x00000008, 0x00000008,
1444         0xc770, 0x00000f00, 0x00000800,
1445         0xc774, 0x00000f00, 0x00000800,
1446         0xc798, 0x00ffffff, 0x00ff7fbf,
1447         0xc79c, 0x00ffffff, 0x00ff7faf,
1448         0x8c00, 0x000000ff, 0x00000800,
1449         0xe40, 0x00001fff, 0x00001fff,
1450         0x9060, 0x0000007f, 0x00000020,
1451         0x9508, 0x00010000, 0x00010000,
1452         0xae00, 0x00100000, 0x000ff07c,
1453         0xac14, 0x000003ff, 0x0000000f,
1454         0xac10, 0xffffffff, 0x7564fdec,
1455         0xac0c, 0xffffffff, 0x3120b9a8,
1456         0xac08, 0x20000000, 0x0f9c0000
1457 };
1458
1459 static const u32 hawaii_mgcg_cgcg_init[] =
1460 {
1461         0xc420, 0xffffffff, 0xfffffffd,
1462         0x30800, 0xffffffff, 0xe0000000,
1463         0x3c2a0, 0xffffffff, 0x00000100,
1464         0x3c208, 0xffffffff, 0x00000100,
1465         0x3c2c0, 0xffffffff, 0x00000100,
1466         0x3c2c8, 0xffffffff, 0x00000100,
1467         0x3c2c4, 0xffffffff, 0x00000100,
1468         0x55e4, 0xffffffff, 0x00200100,
1469         0x3c280, 0xffffffff, 0x00000100,
1470         0x3c214, 0xffffffff, 0x06000100,
1471         0x3c220, 0xffffffff, 0x00000100,
1472         0x3c218, 0xffffffff, 0x06000100,
1473         0x3c204, 0xffffffff, 0x00000100,
1474         0x3c2e0, 0xffffffff, 0x00000100,
1475         0x3c224, 0xffffffff, 0x00000100,
1476         0x3c200, 0xffffffff, 0x00000100,
1477         0x3c230, 0xffffffff, 0x00000100,
1478         0x3c234, 0xffffffff, 0x00000100,
1479         0x3c250, 0xffffffff, 0x00000100,
1480         0x3c254, 0xffffffff, 0x00000100,
1481         0x3c258, 0xffffffff, 0x00000100,
1482         0x3c25c, 0xffffffff, 0x00000100,
1483         0x3c260, 0xffffffff, 0x00000100,
1484         0x3c27c, 0xffffffff, 0x00000100,
1485         0x3c278, 0xffffffff, 0x00000100,
1486         0x3c210, 0xffffffff, 0x06000100,
1487         0x3c290, 0xffffffff, 0x00000100,
1488         0x3c274, 0xffffffff, 0x00000100,
1489         0x3c2b4, 0xffffffff, 0x00000100,
1490         0x3c2b0, 0xffffffff, 0x00000100,
1491         0x3c270, 0xffffffff, 0x00000100,
1492         0x30800, 0xffffffff, 0xe0000000,
1493         0x3c020, 0xffffffff, 0x00010000,
1494         0x3c024, 0xffffffff, 0x00030002,
1495         0x3c028, 0xffffffff, 0x00040007,
1496         0x3c02c, 0xffffffff, 0x00060005,
1497         0x3c030, 0xffffffff, 0x00090008,
1498         0x3c034, 0xffffffff, 0x00010000,
1499         0x3c038, 0xffffffff, 0x00030002,
1500         0x3c03c, 0xffffffff, 0x00040007,
1501         0x3c040, 0xffffffff, 0x00060005,
1502         0x3c044, 0xffffffff, 0x00090008,
1503         0x3c048, 0xffffffff, 0x00010000,
1504         0x3c04c, 0xffffffff, 0x00030002,
1505         0x3c050, 0xffffffff, 0x00040007,
1506         0x3c054, 0xffffffff, 0x00060005,
1507         0x3c058, 0xffffffff, 0x00090008,
1508         0x3c05c, 0xffffffff, 0x00010000,
1509         0x3c060, 0xffffffff, 0x00030002,
1510         0x3c064, 0xffffffff, 0x00040007,
1511         0x3c068, 0xffffffff, 0x00060005,
1512         0x3c06c, 0xffffffff, 0x00090008,
1513         0x3c070, 0xffffffff, 0x00010000,
1514         0x3c074, 0xffffffff, 0x00030002,
1515         0x3c078, 0xffffffff, 0x00040007,
1516         0x3c07c, 0xffffffff, 0x00060005,
1517         0x3c080, 0xffffffff, 0x00090008,
1518         0x3c084, 0xffffffff, 0x00010000,
1519         0x3c088, 0xffffffff, 0x00030002,
1520         0x3c08c, 0xffffffff, 0x00040007,
1521         0x3c090, 0xffffffff, 0x00060005,
1522         0x3c094, 0xffffffff, 0x00090008,
1523         0x3c098, 0xffffffff, 0x00010000,
1524         0x3c09c, 0xffffffff, 0x00030002,
1525         0x3c0a0, 0xffffffff, 0x00040007,
1526         0x3c0a4, 0xffffffff, 0x00060005,
1527         0x3c0a8, 0xffffffff, 0x00090008,
1528         0x3c0ac, 0xffffffff, 0x00010000,
1529         0x3c0b0, 0xffffffff, 0x00030002,
1530         0x3c0b4, 0xffffffff, 0x00040007,
1531         0x3c0b8, 0xffffffff, 0x00060005,
1532         0x3c0bc, 0xffffffff, 0x00090008,
1533         0x3c0c0, 0xffffffff, 0x00010000,
1534         0x3c0c4, 0xffffffff, 0x00030002,
1535         0x3c0c8, 0xffffffff, 0x00040007,
1536         0x3c0cc, 0xffffffff, 0x00060005,
1537         0x3c0d0, 0xffffffff, 0x00090008,
1538         0x3c0d4, 0xffffffff, 0x00010000,
1539         0x3c0d8, 0xffffffff, 0x00030002,
1540         0x3c0dc, 0xffffffff, 0x00040007,
1541         0x3c0e0, 0xffffffff, 0x00060005,
1542         0x3c0e4, 0xffffffff, 0x00090008,
1543         0x3c0e8, 0xffffffff, 0x00010000,
1544         0x3c0ec, 0xffffffff, 0x00030002,
1545         0x3c0f0, 0xffffffff, 0x00040007,
1546         0x3c0f4, 0xffffffff, 0x00060005,
1547         0x3c0f8, 0xffffffff, 0x00090008,
1548         0xc318, 0xffffffff, 0x00020200,
1549         0x3350, 0xffffffff, 0x00000200,
1550         0x15c0, 0xffffffff, 0x00000400,
1551         0x55e8, 0xffffffff, 0x00000000,
1552         0x2f50, 0xffffffff, 0x00000902,
1553         0x3c000, 0xffffffff, 0x96940200,
1554         0x8708, 0xffffffff, 0x00900100,
1555         0xc424, 0xffffffff, 0x0020003f,
1556         0x38, 0xffffffff, 0x0140001c,
1557         0x3c, 0x000f0000, 0x000f0000,
1558         0x220, 0xffffffff, 0xc060000c,
1559         0x224, 0xc0000fff, 0x00000100,
1560         0xf90, 0xffffffff, 0x00000100,
1561         0xf98, 0x00000101, 0x00000000,
1562         0x20a8, 0xffffffff, 0x00000104,
1563         0x55e4, 0xff000fff, 0x00000100,
1564         0x30cc, 0xc0000fff, 0x00000104,
1565         0xc1e4, 0x00000001, 0x00000001,
1566         0xd00c, 0xff000ff0, 0x00000100,
1567         0xd80c, 0xff000ff0, 0x00000100
1568 };
1569
1570 static const u32 godavari_golden_registers[] =
1571 {
1572         0x55e4, 0xff607fff, 0xfc000100,
1573         0x6ed8, 0x00010101, 0x00010000,
1574         0x9830, 0xffffffff, 0x00000000,
1575         0x98302, 0xf00fffff, 0x00000400,
1576         0x6130, 0xffffffff, 0x00010000,
1577         0x5bb0, 0x000000f0, 0x00000070,
1578         0x5bc0, 0xf0311fff, 0x80300000,
1579         0x98f8, 0x73773777, 0x12010001,
1580         0x98fc, 0xffffffff, 0x00000010,
1581         0x8030, 0x00001f0f, 0x0000100a,
1582         0x2f48, 0x73773777, 0x12010001,
1583         0x2408, 0x000fffff, 0x000c007f,
1584         0x8a14, 0xf000003f, 0x00000007,
1585         0x8b24, 0xffffffff, 0x00ff0fff,
1586         0x30a04, 0x0000ff0f, 0x00000000,
1587         0x28a4c, 0x07ffffff, 0x06000000,
1588         0x4d8, 0x00000fff, 0x00000100,
1589         0xd014, 0x00010000, 0x00810001,
1590         0xd814, 0x00010000, 0x00810001,
1591         0x3e78, 0x00000001, 0x00000002,
1592         0xc768, 0x00000008, 0x00000008,
1593         0xc770, 0x00000f00, 0x00000800,
1594         0xc774, 0x00000f00, 0x00000800,
1595         0xc798, 0x00ffffff, 0x00ff7fbf,
1596         0xc79c, 0x00ffffff, 0x00ff7faf,
1597         0x8c00, 0x000000ff, 0x00000001,
1598         0x214f8, 0x01ff01ff, 0x00000002,
1599         0x21498, 0x007ff800, 0x00200000,
1600         0x2015c, 0xffffffff, 0x00000f40,
1601         0x88c4, 0x001f3ae3, 0x00000082,
1602         0x88d4, 0x0000001f, 0x00000010,
1603         0x30934, 0xffffffff, 0x00000000
1604 };
1605
1606
1607 static void cik_init_golden_registers(struct radeon_device *rdev)
1608 {
1609         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1610         mutex_lock(&rdev->grbm_idx_mutex);
1611         switch (rdev->family) {
1612         case CHIP_BONAIRE:
1613                 radeon_program_register_sequence(rdev,
1614                                                  bonaire_mgcg_cgcg_init,
1615                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1616                 radeon_program_register_sequence(rdev,
1617                                                  bonaire_golden_registers,
1618                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1619                 radeon_program_register_sequence(rdev,
1620                                                  bonaire_golden_common_registers,
1621                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1622                 radeon_program_register_sequence(rdev,
1623                                                  bonaire_golden_spm_registers,
1624                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1625                 break;
1626         case CHIP_KABINI:
1627                 radeon_program_register_sequence(rdev,
1628                                                  kalindi_mgcg_cgcg_init,
1629                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1630                 radeon_program_register_sequence(rdev,
1631                                                  kalindi_golden_registers,
1632                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1633                 radeon_program_register_sequence(rdev,
1634                                                  kalindi_golden_common_registers,
1635                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1636                 radeon_program_register_sequence(rdev,
1637                                                  kalindi_golden_spm_registers,
1638                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1639                 break;
1640         case CHIP_MULLINS:
1641                 radeon_program_register_sequence(rdev,
1642                                                  kalindi_mgcg_cgcg_init,
1643                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1644                 radeon_program_register_sequence(rdev,
1645                                                  godavari_golden_registers,
1646                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1647                 radeon_program_register_sequence(rdev,
1648                                                  kalindi_golden_common_registers,
1649                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1650                 radeon_program_register_sequence(rdev,
1651                                                  kalindi_golden_spm_registers,
1652                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1653                 break;
1654         case CHIP_KAVERI:
1655                 radeon_program_register_sequence(rdev,
1656                                                  spectre_mgcg_cgcg_init,
1657                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1658                 radeon_program_register_sequence(rdev,
1659                                                  spectre_golden_registers,
1660                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1661                 radeon_program_register_sequence(rdev,
1662                                                  spectre_golden_common_registers,
1663                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1664                 radeon_program_register_sequence(rdev,
1665                                                  spectre_golden_spm_registers,
1666                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1667                 break;
1668         case CHIP_HAWAII:
1669                 radeon_program_register_sequence(rdev,
1670                                                  hawaii_mgcg_cgcg_init,
1671                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1672                 radeon_program_register_sequence(rdev,
1673                                                  hawaii_golden_registers,
1674                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1675                 radeon_program_register_sequence(rdev,
1676                                                  hawaii_golden_common_registers,
1677                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1678                 radeon_program_register_sequence(rdev,
1679                                                  hawaii_golden_spm_registers,
1680                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1681                 break;
1682         default:
1683                 break;
1684         }
1685         mutex_unlock(&rdev->grbm_idx_mutex);
1686 }
1687
1688 /**
1689  * cik_get_xclk - get the xclk
1690  *
1691  * @rdev: radeon_device pointer
1692  *
1693  * Returns the reference clock used by the gfx engine
1694  * (CIK).
1695  */
1696 u32 cik_get_xclk(struct radeon_device *rdev)
1697 {
1698         u32 reference_clock = rdev->clock.spll.reference_freq;
1699
1700         if (rdev->flags & RADEON_IS_IGP) {
1701                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1702                         return reference_clock / 2;
1703         } else {
1704                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1705                         return reference_clock / 4;
1706         }
1707         return reference_clock;
1708 }
1709
1710 /**
1711  * cik_mm_rdoorbell - read a doorbell dword
1712  *
1713  * @rdev: radeon_device pointer
1714  * @index: doorbell index
1715  *
1716  * Returns the value in the doorbell aperture at the
1717  * requested doorbell index (CIK).
1718  */
1719 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1720 {
1721         if (index < rdev->doorbell.num_doorbells) {
1722                 return readl(rdev->doorbell.ptr + index);
1723         } else {
1724                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1725                 return 0;
1726         }
1727 }
1728
1729 /**
1730  * cik_mm_wdoorbell - write a doorbell dword
1731  *
1732  * @rdev: radeon_device pointer
1733  * @index: doorbell index
1734  * @v: value to write
1735  *
1736  * Writes @v to the doorbell aperture at the
1737  * requested doorbell index (CIK).
1738  */
1739 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1740 {
1741         if (index < rdev->doorbell.num_doorbells) {
1742                 writel(v, rdev->doorbell.ptr + index);
1743         } else {
1744                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1745         }
1746 }
1747
1748 #define BONAIRE_IO_MC_REGS_SIZE 36
1749
1750 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1751 {
1752         {0x00000070, 0x04400000},
1753         {0x00000071, 0x80c01803},
1754         {0x00000072, 0x00004004},
1755         {0x00000073, 0x00000100},
1756         {0x00000074, 0x00ff0000},
1757         {0x00000075, 0x34000000},
1758         {0x00000076, 0x08000014},
1759         {0x00000077, 0x00cc08ec},
1760         {0x00000078, 0x00000400},
1761         {0x00000079, 0x00000000},
1762         {0x0000007a, 0x04090000},
1763         {0x0000007c, 0x00000000},
1764         {0x0000007e, 0x4408a8e8},
1765         {0x0000007f, 0x00000304},
1766         {0x00000080, 0x00000000},
1767         {0x00000082, 0x00000001},
1768         {0x00000083, 0x00000002},
1769         {0x00000084, 0xf3e4f400},
1770         {0x00000085, 0x052024e3},
1771         {0x00000087, 0x00000000},
1772         {0x00000088, 0x01000000},
1773         {0x0000008a, 0x1c0a0000},
1774         {0x0000008b, 0xff010000},
1775         {0x0000008d, 0xffffefff},
1776         {0x0000008e, 0xfff3efff},
1777         {0x0000008f, 0xfff3efbf},
1778         {0x00000092, 0xf7ffffff},
1779         {0x00000093, 0xffffff7f},
1780         {0x00000095, 0x00101101},
1781         {0x00000096, 0x00000fff},
1782         {0x00000097, 0x00116fff},
1783         {0x00000098, 0x60010000},
1784         {0x00000099, 0x10010000},
1785         {0x0000009a, 0x00006000},
1786         {0x0000009b, 0x00001000},
1787         {0x0000009f, 0x00b48000}
1788 };
1789
1790 #define HAWAII_IO_MC_REGS_SIZE 22
1791
1792 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1793 {
1794         {0x0000007d, 0x40000000},
1795         {0x0000007e, 0x40180304},
1796         {0x0000007f, 0x0000ff00},
1797         {0x00000081, 0x00000000},
1798         {0x00000083, 0x00000800},
1799         {0x00000086, 0x00000000},
1800         {0x00000087, 0x00000100},
1801         {0x00000088, 0x00020100},
1802         {0x00000089, 0x00000000},
1803         {0x0000008b, 0x00040000},
1804         {0x0000008c, 0x00000100},
1805         {0x0000008e, 0xff010000},
1806         {0x00000090, 0xffffefff},
1807         {0x00000091, 0xfff3efff},
1808         {0x00000092, 0xfff3efbf},
1809         {0x00000093, 0xf7ffffff},
1810         {0x00000094, 0xffffff7f},
1811         {0x00000095, 0x00000fff},
1812         {0x00000096, 0x00116fff},
1813         {0x00000097, 0x60010000},
1814         {0x00000098, 0x10010000},
1815         {0x0000009f, 0x00c79000}
1816 };
1817
1818
1819 /**
1820  * cik_srbm_select - select specific register instances
1821  *
1822  * @rdev: radeon_device pointer
1823  * @me: selected ME (micro engine)
1824  * @pipe: pipe
1825  * @queue: queue
1826  * @vmid: VMID
1827  *
1828  * Switches the currently active registers instances.  Some
1829  * registers are instanced per VMID, others are instanced per
1830  * me/pipe/queue combination.
1831  */
1832 static void cik_srbm_select(struct radeon_device *rdev,
1833                             u32 me, u32 pipe, u32 queue, u32 vmid)
1834 {
1835         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1836                              MEID(me & 0x3) |
1837                              VMID(vmid & 0xf) |
1838                              QUEUEID(queue & 0x7));
1839         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1840 }
1841
1842 /* ucode loading */
1843 /**
1844  * ci_mc_load_microcode - load MC ucode into the hw
1845  *
1846  * @rdev: radeon_device pointer
1847  *
1848  * Load the GDDR MC ucode into the hw (CIK).
1849  * Returns 0 on success, error on failure.
1850  */
1851 int ci_mc_load_microcode(struct radeon_device *rdev)
1852 {
1853         const __be32 *fw_data = NULL;
1854         const __le32 *new_fw_data = NULL;
1855         u32 running, blackout = 0, tmp;
1856         u32 *io_mc_regs = NULL;
1857         const __le32 *new_io_mc_regs = NULL;
1858         int i, regs_size, ucode_size;
1859
1860         if (!rdev->mc_fw)
1861                 return -EINVAL;
1862
1863         if (rdev->new_fw) {
1864                 const struct mc_firmware_header_v1_0 *hdr =
1865                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1866
1867                 radeon_ucode_print_mc_hdr(&hdr->header);
1868
1869                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1870                 new_io_mc_regs = (const __le32 *)
1871                         ((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1872                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1873                 new_fw_data = (const __le32 *)
1874                         ((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1875         } else {
1876                 ucode_size = rdev->mc_fw->datasize / 4;
1877
1878                 switch (rdev->family) {
1879                 case CHIP_BONAIRE:
1880                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1881                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1882                         break;
1883                 case CHIP_HAWAII:
1884                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1885                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1886                         break;
1887                 default:
1888                         return -EINVAL;
1889                 }
1890                 fw_data = (const __be32 *)rdev->mc_fw->data;
1891         }
1892
1893         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1894
1895         if (running == 0) {
1896                 if (running) {
1897                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1898                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1899                 }
1900
1901                 /* reset the engine and set to writable */
1902                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1903                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1904
1905                 /* load mc io regs */
1906                 for (i = 0; i < regs_size; i++) {
1907                         if (rdev->new_fw) {
1908                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1909                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1910                         } else {
1911                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1912                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1913                         }
1914                 }
1915
1916                 tmp = RREG32(MC_SEQ_MISC0);
1917                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1918                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1919                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1920                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1921                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1922                 }
1923
1924                 /* load the MC ucode */
1925                 for (i = 0; i < ucode_size; i++) {
1926                         if (rdev->new_fw)
1927                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1928                         else
1929                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1930                 }
1931
1932                 /* put the engine back into the active state */
1933                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1934                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1935                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1936
1937                 /* wait for training to complete */
1938                 for (i = 0; i < rdev->usec_timeout; i++) {
1939                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1940                                 break;
1941                         udelay(1);
1942                 }
1943                 for (i = 0; i < rdev->usec_timeout; i++) {
1944                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1945                                 break;
1946                         udelay(1);
1947                 }
1948
1949                 if (running)
1950                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1951         }
1952
1953         return 0;
1954 }
1955
1956 /**
1957  * cik_init_microcode - load ucode images from disk
1958  *
1959  * @rdev: radeon_device pointer
1960  *
1961  * Use the firmware interface to load the ucode images into
1962  * the driver (not loaded into hw).
1963  * Returns 0 on success, error on failure.
1964  */
1965 static int cik_init_microcode(struct radeon_device *rdev)
1966 {
1967         const char *chip_name;
1968         const char *new_chip_name;
1969         size_t pfp_req_size, me_req_size, ce_req_size,
1970                 mec_req_size, rlc_req_size, mc_req_size = 0,
1971                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1972         char fw_name[30];
1973         int new_fw = 0;
1974         int err;
1975         int num_fw;
1976
1977         DRM_DEBUG("\n");
1978
1979         switch (rdev->family) {
1980         case CHIP_BONAIRE:
1981                 chip_name = "BONAIRE";
1982                 new_chip_name = "bonaire";
1983                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1984                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1985                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1986                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1987                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1988                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1989                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1990                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1991                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1992                 num_fw = 8;
1993                 break;
1994         case CHIP_HAWAII:
1995                 chip_name = "HAWAII";
1996                 new_chip_name = "hawaii";
1997                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1998                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1999                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2000                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2001                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2002                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2003                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2004                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2005                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2006                 num_fw = 8;
2007                 break;
2008         case CHIP_KAVERI:
2009                 chip_name = "KAVERI";
2010                 new_chip_name = "kaveri";
2011                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2012                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2013                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2014                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2015                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2016                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2017                 num_fw = 7;
2018                 break;
2019         case CHIP_KABINI:
2020                 chip_name = "KABINI";
2021                 new_chip_name = "kabini";
2022                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2023                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2024                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2025                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2026                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2027                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2028                 num_fw = 6;
2029                 break;
2030         case CHIP_MULLINS:
2031                 chip_name = "MULLINS";
2032                 new_chip_name = "mullins";
2033                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2034                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2035                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2036                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2037                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2038                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2039                 num_fw = 6;
2040                 break;
2041         default: BUG();
2042         }
2043
2044         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2045
2046         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
2047         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2048         if (err) {
2049                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
2050                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2051                 if (err)
2052                         goto out;
2053                 if (rdev->pfp_fw->datasize != pfp_req_size) {
2054                         printk(KERN_ERR
2055                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2056                                rdev->pfp_fw->datasize, fw_name);
2057                         err = -EINVAL;
2058                         goto out;
2059                 }
2060         } else {
2061                 err = radeon_ucode_validate(rdev->pfp_fw);
2062                 if (err) {
2063                         printk(KERN_ERR
2064                                "cik_fw: validation failed for firmware \"%s\"\n",
2065                                fw_name);
2066                         goto out;
2067                 } else {
2068                         new_fw++;
2069                 }
2070         }
2071
2072         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2073         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2074         if (err) {
2075                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2076                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2077                 if (err)
2078                         goto out;
2079                 if (rdev->me_fw->datasize != me_req_size) {
2080                         printk(KERN_ERR
2081                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2082                                rdev->me_fw->datasize, fw_name);
2083                         err = -EINVAL;
2084                 }
2085         } else {
2086                 err = radeon_ucode_validate(rdev->me_fw);
2087                 if (err) {
2088                         printk(KERN_ERR
2089                                "cik_fw: validation failed for firmware \"%s\"\n",
2090                                fw_name);
2091                         goto out;
2092                 } else {
2093                         new_fw++;
2094                 }
2095         }
2096
2097         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2098         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2099         if (err) {
2100                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2101                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2102                 if (err)
2103                         goto out;
2104                 if (rdev->ce_fw->datasize != ce_req_size) {
2105                         printk(KERN_ERR
2106                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2107                                rdev->ce_fw->datasize, fw_name);
2108                         err = -EINVAL;
2109                 }
2110         } else {
2111                 err = radeon_ucode_validate(rdev->ce_fw);
2112                 if (err) {
2113                         printk(KERN_ERR
2114                                "cik_fw: validation failed for firmware \"%s\"\n",
2115                                fw_name);
2116                         goto out;
2117                 } else {
2118                         new_fw++;
2119                 }
2120         }
2121
2122         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2123         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2124         if (err) {
2125                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2126                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2127                 if (err)
2128                         goto out;
2129                 if (rdev->mec_fw->datasize != mec_req_size) {
2130                         printk(KERN_ERR
2131                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2132                                rdev->mec_fw->datasize, fw_name);
2133                         err = -EINVAL;
2134                 }
2135         } else {
2136                 err = radeon_ucode_validate(rdev->mec_fw);
2137                 if (err) {
2138                         printk(KERN_ERR
2139                                "cik_fw: validation failed for firmware \"%s\"\n",
2140                                fw_name);
2141                         goto out;
2142                 } else {
2143                         new_fw++;
2144                 }
2145         }
2146
2147         if (rdev->family == CHIP_KAVERI) {
2148                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2149                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2150                 if (err) {
2151                         goto out;
2152                 } else {
2153                         err = radeon_ucode_validate(rdev->mec2_fw);
2154                         if (err) {
2155                                 goto out;
2156                         } else {
2157                                 new_fw++;
2158                         }
2159                 }
2160         }
2161
2162         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2163         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164         if (err) {
2165                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2166                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2167                 if (err)
2168                         goto out;
2169                 if (rdev->rlc_fw->datasize != rlc_req_size) {
2170                         printk(KERN_ERR
2171                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2172                                rdev->rlc_fw->datasize, fw_name);
2173                         err = -EINVAL;
2174                 }
2175         } else {
2176                 err = radeon_ucode_validate(rdev->rlc_fw);
2177                 if (err) {
2178                         printk(KERN_ERR
2179                                "cik_fw: validation failed for firmware \"%s\"\n",
2180                                fw_name);
2181                         goto out;
2182                 } else {
2183                         new_fw++;
2184                 }
2185         }
2186
2187         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2188         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2189         if (err) {
2190                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2191                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2192                 if (err)
2193                         goto out;
2194                 if (rdev->sdma_fw->datasize != sdma_req_size) {
2195                         printk(KERN_ERR
2196                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2197                                rdev->sdma_fw->datasize, fw_name);
2198                         err = -EINVAL;
2199                 }
2200         } else {
2201                 err = radeon_ucode_validate(rdev->sdma_fw);
2202                 if (err) {
2203                         printk(KERN_ERR
2204                                "cik_fw: validation failed for firmware \"%s\"\n",
2205                                fw_name);
2206                         goto out;
2207                 } else {
2208                         new_fw++;
2209                 }
2210         }
2211
2212         /* No SMC, MC ucode on APUs */
2213         if (!(rdev->flags & RADEON_IS_IGP)) {
2214                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2215                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2216                 if (err) {
2217                         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2218                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2219                         if (err) {
2220                                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2221                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2222                                 if (err)
2223                                         goto out;
2224                         }
2225                         if ((rdev->mc_fw->datasize != mc_req_size) &&
2226                             (rdev->mc_fw->datasize != mc2_req_size)){
2227                                 printk(KERN_ERR
2228                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2229                                        rdev->mc_fw->datasize, fw_name);
2230                                 err = -EINVAL;
2231                         }
2232                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2233                 } else {
2234                         err = radeon_ucode_validate(rdev->mc_fw);
2235                         if (err) {
2236                                 printk(KERN_ERR
2237                                        "cik_fw: validation failed for firmware \"%s\"\n",
2238                                        fw_name);
2239                                 goto out;
2240                         } else {
2241                                 new_fw++;
2242                         }
2243                 }
2244
2245                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2246                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247                 if (err) {
2248                         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2249                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2250                         if (err) {
2251                                 printk(KERN_ERR
2252                                        "smc: error loading firmware \"%s\"\n",
2253                                        fw_name);
2254                                 release_firmware(rdev->smc_fw);
2255                                 rdev->smc_fw = NULL;
2256                                 err = 0;
2257                         } else if (rdev->smc_fw->datasize != smc_req_size) {
2258                                 printk(KERN_ERR
2259                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2260                                        rdev->smc_fw->datasize, fw_name);
2261                                 err = -EINVAL;
2262                         }
2263                 } else {
2264                         err = radeon_ucode_validate(rdev->smc_fw);
2265                         if (err) {
2266                                 printk(KERN_ERR
2267                                        "cik_fw: validation failed for firmware \"%s\"\n",
2268                                        fw_name);
2269                                 goto out;
2270                         } else {
2271                                 new_fw++;
2272                         }
2273                 }
2274         }
2275
2276         if (new_fw == 0) {
2277                 rdev->new_fw = false;
2278         } else if (new_fw < num_fw) {
2279                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2280                 err = -EINVAL;
2281         } else {
2282                 rdev->new_fw = true;
2283         }
2284
2285 out:
2286         if (err) {
2287                 if (err != -EINVAL)
2288                         printk(KERN_ERR
2289                                "cik_cp: Failed to load firmware \"%s\"\n",
2290                                fw_name);
2291                 release_firmware(rdev->pfp_fw);
2292                 rdev->pfp_fw = NULL;
2293                 release_firmware(rdev->me_fw);
2294                 rdev->me_fw = NULL;
2295                 release_firmware(rdev->ce_fw);
2296                 rdev->ce_fw = NULL;
2297                 release_firmware(rdev->mec_fw);
2298                 rdev->mec_fw = NULL;
2299                 release_firmware(rdev->mec2_fw);
2300                 rdev->mec2_fw = NULL;
2301                 release_firmware(rdev->rlc_fw);
2302                 rdev->rlc_fw = NULL;
2303                 release_firmware(rdev->sdma_fw);
2304                 rdev->sdma_fw = NULL;
2305                 release_firmware(rdev->mc_fw);
2306                 rdev->mc_fw = NULL;
2307                 release_firmware(rdev->smc_fw);
2308                 rdev->smc_fw = NULL;
2309         }
2310         return err;
2311 }
2312
2313 /**
2314  * cik_fini_microcode - drop the firmwares image references
2315  *
2316  * @rdev: radeon_device pointer
2317  *
2318  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2319  * Called at driver shutdown.
2320  */
2321 static void cik_fini_microcode(struct radeon_device *rdev)
2322 {
2323         release_firmware(rdev->pfp_fw);
2324         rdev->pfp_fw = NULL;
2325         release_firmware(rdev->me_fw);
2326         rdev->me_fw = NULL;
2327         release_firmware(rdev->ce_fw);
2328         rdev->ce_fw = NULL;
2329         release_firmware(rdev->mec_fw);
2330         rdev->mec_fw = NULL;
2331         release_firmware(rdev->mec2_fw);
2332         rdev->mec2_fw = NULL;
2333         release_firmware(rdev->rlc_fw);
2334         rdev->rlc_fw = NULL;
2335         release_firmware(rdev->sdma_fw);
2336         rdev->sdma_fw = NULL;
2337         release_firmware(rdev->mc_fw);
2338         rdev->mc_fw = NULL;
2339         release_firmware(rdev->smc_fw);
2340         rdev->smc_fw = NULL;
2341 }
2342
2343 /*
2344  * Core functions
2345  */
2346 /**
2347  * cik_tiling_mode_table_init - init the hw tiling table
2348  *
2349  * @rdev: radeon_device pointer
2350  *
2351  * Starting with SI, the tiling setup is done globally in a
2352  * set of 32 tiling modes.  Rather than selecting each set of
2353  * parameters per surface as on older asics, we just select
2354  * which index in the tiling table we want to use, and the
2355  * surface uses those parameters (CIK).
2356  */
2357 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2358 {
2359         u32 *tile = rdev->config.cik.tile_mode_array;
2360         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2361         const u32 num_tile_mode_states =
2362                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2363         const u32 num_secondary_tile_mode_states =
2364                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2365         u32 reg_offset, split_equal_to_row_size;
2366         u32 num_pipe_configs;
2367         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2368                 rdev->config.cik.max_shader_engines;
2369
2370         switch (rdev->config.cik.mem_row_size_in_kb) {
2371         case 1:
2372                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2373                 break;
2374         case 2:
2375         default:
2376                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2377                 break;
2378         case 4:
2379                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2380                 break;
2381         }
2382
2383         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2384         if (num_pipe_configs > 8)
2385                 num_pipe_configs = 16;
2386
2387         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2388                 tile[reg_offset] = 0;
2389         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2390                 macrotile[reg_offset] = 0;
2391
2392         switch(num_pipe_configs) {
2393         case 16:
2394                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2396                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2398                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2402                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2404                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2406                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2408                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2410                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2412                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413                            TILE_SPLIT(split_equal_to_row_size));
2414                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2418                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2419                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2421                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2422                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2423                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                            TILE_SPLIT(split_equal_to_row_size));
2425                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2426                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2427                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2430                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2437                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2439                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2443                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2445                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2450                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2452                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2454                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2458                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2460                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2461                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2462                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2464                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2465                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2466                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2467                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2469                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2470                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2471                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472
2473                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476                            NUM_BANKS(ADDR_SURF_16_BANK));
2477                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                            NUM_BANKS(ADDR_SURF_16_BANK));
2481                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484                            NUM_BANKS(ADDR_SURF_16_BANK));
2485                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488                            NUM_BANKS(ADDR_SURF_16_BANK));
2489                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                            NUM_BANKS(ADDR_SURF_8_BANK));
2493                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496                            NUM_BANKS(ADDR_SURF_4_BANK));
2497                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500                            NUM_BANKS(ADDR_SURF_2_BANK));
2501                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504                            NUM_BANKS(ADDR_SURF_16_BANK));
2505                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2507                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508                            NUM_BANKS(ADDR_SURF_16_BANK));
2509                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512                             NUM_BANKS(ADDR_SURF_16_BANK));
2513                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2516                             NUM_BANKS(ADDR_SURF_8_BANK));
2517                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520                             NUM_BANKS(ADDR_SURF_4_BANK));
2521                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524                             NUM_BANKS(ADDR_SURF_2_BANK));
2525                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2528                             NUM_BANKS(ADDR_SURF_2_BANK));
2529
2530                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2531                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2532                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2533                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2534                 break;
2535
2536         case 8:
2537                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2541                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2543                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2545                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2547                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2549                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2553                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2555                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                            TILE_SPLIT(split_equal_to_row_size));
2557                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2560                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2561                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2562                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2564                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2565                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                            TILE_SPLIT(split_equal_to_row_size));
2568                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2569                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2570                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2571                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2573                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2575                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2578                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2579                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2580                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2582                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2588                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2590                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2592                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2593                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2594                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2597                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2598                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2601                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2603                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2605                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2607                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2608                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2609                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2610                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2611                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2612                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2615
2616                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2618                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619                                 NUM_BANKS(ADDR_SURF_16_BANK));
2620                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2623                                 NUM_BANKS(ADDR_SURF_16_BANK));
2624                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2626                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627                                 NUM_BANKS(ADDR_SURF_16_BANK));
2628                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631                                 NUM_BANKS(ADDR_SURF_16_BANK));
2632                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635                                 NUM_BANKS(ADDR_SURF_8_BANK));
2636                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639                                 NUM_BANKS(ADDR_SURF_4_BANK));
2640                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643                                 NUM_BANKS(ADDR_SURF_2_BANK));
2644                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2646                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2647                                 NUM_BANKS(ADDR_SURF_16_BANK));
2648                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2650                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2651                                 NUM_BANKS(ADDR_SURF_16_BANK));
2652                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2654                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2655                                 NUM_BANKS(ADDR_SURF_16_BANK));
2656                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2659                                 NUM_BANKS(ADDR_SURF_16_BANK));
2660                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2663                                 NUM_BANKS(ADDR_SURF_8_BANK));
2664                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2667                                 NUM_BANKS(ADDR_SURF_4_BANK));
2668                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2670                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2671                                 NUM_BANKS(ADDR_SURF_2_BANK));
2672
2673                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2674                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2675                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2676                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2677                 break;
2678
2679         case 4:
2680                 if (num_rbs == 4) {
2681                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2683                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2685                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2687                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2689                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2691                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2693                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2695                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2697                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2699                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                            TILE_SPLIT(split_equal_to_row_size));
2701                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2705                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2706                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2708                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2709                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2710                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                            TILE_SPLIT(split_equal_to_row_size));
2712                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2713                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2714                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2717                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2719                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2721                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2723                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2726                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2727                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2730                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2732                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2741                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2744                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2747                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2749                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2751                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2753                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2755                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2756                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2757                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759
2760                 } else if (num_rbs < 4) {
2761                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2765                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2767                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2769                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2771                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2773                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2777                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2778                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2779                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780                            TILE_SPLIT(split_equal_to_row_size));
2781                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2785                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2788                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2789                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2790                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                            TILE_SPLIT(split_equal_to_row_size));
2792                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2793                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2794                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2795                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2797                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2799                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2800                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2801                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2802                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2803                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2805                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2806                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2810                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2811                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2812                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2815                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2816                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2821                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2824                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2825                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2826                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2827                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2828                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2829                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2830                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2832                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2833                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2834                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2836                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2837                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839                 }
2840
2841                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844                                 NUM_BANKS(ADDR_SURF_16_BANK));
2845                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2846                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2847                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848                                 NUM_BANKS(ADDR_SURF_16_BANK));
2849                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864                                 NUM_BANKS(ADDR_SURF_8_BANK));
2865                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868                                 NUM_BANKS(ADDR_SURF_4_BANK));
2869                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2870                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2871                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2872                                 NUM_BANKS(ADDR_SURF_16_BANK));
2873                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2874                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2875                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2876                                 NUM_BANKS(ADDR_SURF_16_BANK));
2877                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2879                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2880                                 NUM_BANKS(ADDR_SURF_16_BANK));
2881                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2883                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2884                                 NUM_BANKS(ADDR_SURF_16_BANK));
2885                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2887                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2888                                 NUM_BANKS(ADDR_SURF_16_BANK));
2889                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2892                                 NUM_BANKS(ADDR_SURF_8_BANK));
2893                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2896                                 NUM_BANKS(ADDR_SURF_4_BANK));
2897
2898                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2899                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2900                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2901                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2902                 break;
2903
2904         case 2:
2905                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2907                            PIPE_CONFIG(ADDR_SURF_P2) |
2908                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2909                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2911                            PIPE_CONFIG(ADDR_SURF_P2) |
2912                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2913                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2915                            PIPE_CONFIG(ADDR_SURF_P2) |
2916                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2917                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2919                            PIPE_CONFIG(ADDR_SURF_P2) |
2920                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2921                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2922                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2923                            PIPE_CONFIG(ADDR_SURF_P2) |
2924                            TILE_SPLIT(split_equal_to_row_size));
2925                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926                            PIPE_CONFIG(ADDR_SURF_P2) |
2927                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2929                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2930                            PIPE_CONFIG(ADDR_SURF_P2) |
2931                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2932                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2933                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934                            PIPE_CONFIG(ADDR_SURF_P2) |
2935                            TILE_SPLIT(split_equal_to_row_size));
2936                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2937                            PIPE_CONFIG(ADDR_SURF_P2);
2938                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2939                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940                            PIPE_CONFIG(ADDR_SURF_P2));
2941                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943                             PIPE_CONFIG(ADDR_SURF_P2) |
2944                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2946                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947                             PIPE_CONFIG(ADDR_SURF_P2) |
2948                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2950                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2951                             PIPE_CONFIG(ADDR_SURF_P2) |
2952                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954                             PIPE_CONFIG(ADDR_SURF_P2) |
2955                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2956                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2957                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958                             PIPE_CONFIG(ADDR_SURF_P2) |
2959                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2961                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962                             PIPE_CONFIG(ADDR_SURF_P2) |
2963                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2965                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966                             PIPE_CONFIG(ADDR_SURF_P2) |
2967                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2969                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2970                             PIPE_CONFIG(ADDR_SURF_P2));
2971                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2972                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2973                             PIPE_CONFIG(ADDR_SURF_P2) |
2974                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2975                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2976                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2977                             PIPE_CONFIG(ADDR_SURF_P2) |
2978                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2979                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2980                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981                             PIPE_CONFIG(ADDR_SURF_P2) |
2982                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983
2984                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987                                 NUM_BANKS(ADDR_SURF_16_BANK));
2988                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2989                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2990                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991                                 NUM_BANKS(ADDR_SURF_16_BANK));
2992                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995                                 NUM_BANKS(ADDR_SURF_16_BANK));
2996                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                 NUM_BANKS(ADDR_SURF_16_BANK));
3000                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                 NUM_BANKS(ADDR_SURF_16_BANK));
3004                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011                                 NUM_BANKS(ADDR_SURF_8_BANK));
3012                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3013                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3014                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015                                 NUM_BANKS(ADDR_SURF_16_BANK));
3016                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3017                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3018                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3019                                 NUM_BANKS(ADDR_SURF_16_BANK));
3020                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3021                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023                                 NUM_BANKS(ADDR_SURF_16_BANK));
3024                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                 NUM_BANKS(ADDR_SURF_16_BANK));
3032                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3034                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3035                                 NUM_BANKS(ADDR_SURF_16_BANK));
3036                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3039                                 NUM_BANKS(ADDR_SURF_8_BANK));
3040
3041                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3042                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3043                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3044                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3045                 break;
3046
3047         default:
3048                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3049         }
3050 }
3051
3052 /**
3053  * cik_select_se_sh - select which SE, SH to address
3054  *
3055  * @rdev: radeon_device pointer
3056  * @se_num: shader engine to address
3057  * @sh_num: sh block to address
3058  *
3059  * Select which SE, SH combinations to address. Certain
3060  * registers are instanced per SE or SH.  0xffffffff means
3061  * broadcast to all SEs or SHs (CIK).
3062  */
3063 static void cik_select_se_sh(struct radeon_device *rdev,
3064                              u32 se_num, u32 sh_num)
3065 {
3066         u32 data = INSTANCE_BROADCAST_WRITES;
3067
3068         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3069                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3070         else if (se_num == 0xffffffff)
3071                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3072         else if (sh_num == 0xffffffff)
3073                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3074         else
3075                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3076         WREG32(GRBM_GFX_INDEX, data);
3077 }
3078
3079 /**
3080  * cik_create_bitmask - create a bitmask
3081  *
3082  * @bit_width: length of the mask
3083  *
3084  * create a variable length bit mask (CIK).
3085  * Returns the bitmask.
3086  */
3087 static u32 cik_create_bitmask(u32 bit_width)
3088 {
3089         u32 i, mask = 0;
3090
3091         for (i = 0; i < bit_width; i++) {
3092                 mask <<= 1;
3093                 mask |= 1;
3094         }
3095         return mask;
3096 }
3097
3098 /**
3099  * cik_get_rb_disabled - computes the mask of disabled RBs
3100  *
3101  * @rdev: radeon_device pointer
3102  * @max_rb_num: max RBs (render backends) for the asic
3103  * @se_num: number of SEs (shader engines) for the asic
3104  * @sh_per_se: number of SH blocks per SE for the asic
3105  *
3106  * Calculates the bitmask of disabled RBs (CIK).
3107  * Returns the disabled RB bitmask.
3108  */
3109 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3110                               u32 max_rb_num_per_se,
3111                               u32 sh_per_se)
3112 {
3113         u32 data, mask;
3114
3115         data = RREG32(CC_RB_BACKEND_DISABLE);
3116         if (data & 1)
3117                 data &= BACKEND_DISABLE_MASK;
3118         else
3119                 data = 0;
3120         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3121
3122         data >>= BACKEND_DISABLE_SHIFT;
3123
3124         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3125
3126         return data & mask;
3127 }
3128
3129 /**
3130  * cik_setup_rb - setup the RBs on the asic
3131  *
3132  * @rdev: radeon_device pointer
3133  * @se_num: number of SEs (shader engines) for the asic
3134  * @sh_per_se: number of SH blocks per SE for the asic
3135  * @max_rb_num: max RBs (render backends) for the asic
3136  *
3137  * Configures per-SE/SH RB registers (CIK).
3138  */
3139 static void cik_setup_rb(struct radeon_device *rdev,
3140                          u32 se_num, u32 sh_per_se,
3141                          u32 max_rb_num_per_se)
3142 {
3143         int i, j;
3144         u32 data, mask;
3145         u32 disabled_rbs = 0;
3146         u32 enabled_rbs = 0;
3147
3148         mutex_lock(&rdev->grbm_idx_mutex);
3149         for (i = 0; i < se_num; i++) {
3150                 for (j = 0; j < sh_per_se; j++) {
3151                         cik_select_se_sh(rdev, i, j);
3152                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3153                         if (rdev->family == CHIP_HAWAII)
3154                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3155                         else
3156                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3157                 }
3158         }
3159         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3160         mutex_unlock(&rdev->grbm_idx_mutex);
3161
3162         mask = 1;
3163         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3164                 if (!(disabled_rbs & mask))
3165                         enabled_rbs |= mask;
3166                 mask <<= 1;
3167         }
3168
3169         rdev->config.cik.backend_enable_mask = enabled_rbs;
3170
3171         mutex_lock(&rdev->grbm_idx_mutex);
3172         for (i = 0; i < se_num; i++) {
3173                 cik_select_se_sh(rdev, i, 0xffffffff);
3174                 data = 0;
3175                 for (j = 0; j < sh_per_se; j++) {
3176                         switch (enabled_rbs & 3) {
3177                         case 0:
3178                                 if (j == 0)
3179                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3180                                 else
3181                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3182                                 break;
3183                         case 1:
3184                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3185                                 break;
3186                         case 2:
3187                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3188                                 break;
3189                         case 3:
3190                         default:
3191                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3192                                 break;
3193                         }
3194                         enabled_rbs >>= 2;
3195                 }
3196                 WREG32(PA_SC_RASTER_CONFIG, data);
3197         }
3198         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3199         mutex_unlock(&rdev->grbm_idx_mutex);
3200 }
3201
3202 /**
3203  * cik_gpu_init - setup the 3D engine
3204  *
3205  * @rdev: radeon_device pointer
3206  *
3207  * Configures the 3D engine and tiling configuration
3208  * registers so that the 3D engine is usable.
3209  */
3210 static void cik_gpu_init(struct radeon_device *rdev)
3211 {
3212         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3213         u32 mc_shared_chmap, mc_arb_ramcfg;
3214         u32 hdp_host_path_cntl;
3215         u32 tmp;
3216         int i, j;
3217
3218         switch (rdev->family) {
3219         case CHIP_BONAIRE:
3220                 rdev->config.cik.max_shader_engines = 2;
3221                 rdev->config.cik.max_tile_pipes = 4;
3222                 rdev->config.cik.max_cu_per_sh = 7;
3223                 rdev->config.cik.max_sh_per_se = 1;
3224                 rdev->config.cik.max_backends_per_se = 2;
3225                 rdev->config.cik.max_texture_channel_caches = 4;
3226                 rdev->config.cik.max_gprs = 256;
3227                 rdev->config.cik.max_gs_threads = 32;
3228                 rdev->config.cik.max_hw_contexts = 8;
3229
3230                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3231                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3232                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3233                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3234                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3235                 break;
3236         case CHIP_HAWAII:
3237                 rdev->config.cik.max_shader_engines = 4;
3238                 rdev->config.cik.max_tile_pipes = 16;
3239                 rdev->config.cik.max_cu_per_sh = 11;
3240                 rdev->config.cik.max_sh_per_se = 1;
3241                 rdev->config.cik.max_backends_per_se = 4;
3242                 rdev->config.cik.max_texture_channel_caches = 16;
3243                 rdev->config.cik.max_gprs = 256;
3244                 rdev->config.cik.max_gs_threads = 32;
3245                 rdev->config.cik.max_hw_contexts = 8;
3246
3247                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3248                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3249                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3250                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3251                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3252                 break;
3253         case CHIP_KAVERI:
3254                 rdev->config.cik.max_shader_engines = 1;
3255                 rdev->config.cik.max_tile_pipes = 4;
3256                 if ((rdev->pdev->device == 0x1304) ||
3257                     (rdev->pdev->device == 0x1305) ||
3258                     (rdev->pdev->device == 0x130C) ||
3259                     (rdev->pdev->device == 0x130F) ||
3260                     (rdev->pdev->device == 0x1310) ||
3261                     (rdev->pdev->device == 0x1311) ||
3262                     (rdev->pdev->device == 0x131C)) {
3263                         rdev->config.cik.max_cu_per_sh = 8;
3264                         rdev->config.cik.max_backends_per_se = 2;
3265                 } else if ((rdev->pdev->device == 0x1309) ||
3266                            (rdev->pdev->device == 0x130A) ||
3267                            (rdev->pdev->device == 0x130D) ||
3268                            (rdev->pdev->device == 0x1313) ||
3269                            (rdev->pdev->device == 0x131D)) {
3270                         rdev->config.cik.max_cu_per_sh = 6;
3271                         rdev->config.cik.max_backends_per_se = 2;
3272                 } else if ((rdev->pdev->device == 0x1306) ||
3273                            (rdev->pdev->device == 0x1307) ||
3274                            (rdev->pdev->device == 0x130B) ||
3275                            (rdev->pdev->device == 0x130E) ||
3276                            (rdev->pdev->device == 0x1315) ||
3277                            (rdev->pdev->device == 0x1318) ||
3278                            (rdev->pdev->device == 0x131B)) {
3279                         rdev->config.cik.max_cu_per_sh = 4;
3280                         rdev->config.cik.max_backends_per_se = 1;
3281                 } else {
3282                         rdev->config.cik.max_cu_per_sh = 3;
3283                         rdev->config.cik.max_backends_per_se = 1;
3284                 }
3285                 rdev->config.cik.max_sh_per_se = 1;
3286                 rdev->config.cik.max_texture_channel_caches = 4;
3287                 rdev->config.cik.max_gprs = 256;
3288                 rdev->config.cik.max_gs_threads = 16;
3289                 rdev->config.cik.max_hw_contexts = 8;
3290
3291                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3292                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3293                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3294                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3295                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3296                 break;
3297         case CHIP_KABINI:
3298         case CHIP_MULLINS:
3299         default:
3300                 rdev->config.cik.max_shader_engines = 1;
3301                 rdev->config.cik.max_tile_pipes = 2;
3302                 rdev->config.cik.max_cu_per_sh = 2;
3303                 rdev->config.cik.max_sh_per_se = 1;
3304                 rdev->config.cik.max_backends_per_se = 1;
3305                 rdev->config.cik.max_texture_channel_caches = 2;
3306                 rdev->config.cik.max_gprs = 256;
3307                 rdev->config.cik.max_gs_threads = 16;
3308                 rdev->config.cik.max_hw_contexts = 8;
3309
3310                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3311                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3312                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3313                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3314                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3315                 break;
3316         }
3317
3318         /* Initialize HDP */
3319         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3320                 WREG32((0x2c14 + j), 0x00000000);
3321                 WREG32((0x2c18 + j), 0x00000000);
3322                 WREG32((0x2c1c + j), 0x00000000);
3323                 WREG32((0x2c20 + j), 0x00000000);
3324                 WREG32((0x2c24 + j), 0x00000000);
3325         }
3326
3327         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3328         WREG32(SRBM_INT_CNTL, 0x1);
3329         WREG32(SRBM_INT_ACK, 0x1);
3330
3331         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3332
3333         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3334         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3335
3336         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3337         rdev->config.cik.mem_max_burst_length_bytes = 256;
3338         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3339         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3340         if (rdev->config.cik.mem_row_size_in_kb > 4)
3341                 rdev->config.cik.mem_row_size_in_kb = 4;
3342         /* XXX use MC settings? */
3343         rdev->config.cik.shader_engine_tile_size = 32;
3344         rdev->config.cik.num_gpus = 1;
3345         rdev->config.cik.multi_gpu_tile_size = 64;
3346
3347         /* fix up row size */
3348         gb_addr_config &= ~ROW_SIZE_MASK;
3349         switch (rdev->config.cik.mem_row_size_in_kb) {
3350         case 1:
3351         default:
3352                 gb_addr_config |= ROW_SIZE(0);
3353                 break;
3354         case 2:
3355                 gb_addr_config |= ROW_SIZE(1);
3356                 break;
3357         case 4:
3358                 gb_addr_config |= ROW_SIZE(2);
3359                 break;
3360         }
3361
3362         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3363          * not have bank info, so create a custom tiling dword.
3364          * bits 3:0   num_pipes
3365          * bits 7:4   num_banks
3366          * bits 11:8  group_size
3367          * bits 15:12 row_size
3368          */
3369         rdev->config.cik.tile_config = 0;
3370         switch (rdev->config.cik.num_tile_pipes) {
3371         case 1:
3372                 rdev->config.cik.tile_config |= (0 << 0);
3373                 break;
3374         case 2:
3375                 rdev->config.cik.tile_config |= (1 << 0);
3376                 break;
3377         case 4:
3378                 rdev->config.cik.tile_config |= (2 << 0);
3379                 break;
3380         case 8:
3381         default:
3382                 /* XXX what about 12? */
3383                 rdev->config.cik.tile_config |= (3 << 0);
3384                 break;
3385         }
3386         rdev->config.cik.tile_config |=
3387                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3388         rdev->config.cik.tile_config |=
3389                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3390         rdev->config.cik.tile_config |=
3391                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3392
3393         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3394         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3395         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3396         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3397         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3398         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3399         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3400         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3401
3402         cik_tiling_mode_table_init(rdev);
3403
3404         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3405                      rdev->config.cik.max_sh_per_se,
3406                      rdev->config.cik.max_backends_per_se);
3407
3408         rdev->config.cik.active_cus = 0;
3409         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3410                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3411                         rdev->config.cik.active_cus +=
3412                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3413                 }
3414         }
3415
3416         /* set HW defaults for 3D engine */
3417         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3418
3419         mutex_lock(&rdev->grbm_idx_mutex);
3420         /*
3421          * making sure that the following register writes will be broadcasted
3422          * to all the shaders
3423          */
3424         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3425         WREG32(SX_DEBUG_1, 0x20);
3426
3427         WREG32(TA_CNTL_AUX, 0x00010000);
3428
3429         tmp = RREG32(SPI_CONFIG_CNTL);
3430         tmp |= 0x03000000;
3431         WREG32(SPI_CONFIG_CNTL, tmp);
3432
3433         WREG32(SQ_CONFIG, 1);
3434
3435         WREG32(DB_DEBUG, 0);
3436
3437         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3438         tmp |= 0x00000400;
3439         WREG32(DB_DEBUG2, tmp);
3440
3441         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3442         tmp |= 0x00020200;
3443         WREG32(DB_DEBUG3, tmp);
3444
3445         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3446         tmp |= 0x00018208;
3447         WREG32(CB_HW_CONTROL, tmp);
3448
3449         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3450
3451         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3452                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3453                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3454                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3455
3456         WREG32(VGT_NUM_INSTANCES, 1);
3457
3458         WREG32(CP_PERFMON_CNTL, 0);
3459
3460         WREG32(SQ_CONFIG, 0);
3461
3462         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3463                                           FORCE_EOV_MAX_REZ_CNT(255)));
3464
3465         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3466                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3467
3468         WREG32(VGT_GS_VERTEX_REUSE, 16);
3469         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3470
3471         tmp = RREG32(HDP_MISC_CNTL);
3472         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3473         WREG32(HDP_MISC_CNTL, tmp);
3474
3475         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3476         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3477
3478         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3479         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3480         mutex_unlock(&rdev->grbm_idx_mutex);
3481
3482         udelay(50);
3483 }
3484
3485 /*
3486  * GPU scratch registers helpers function.
3487  */
3488 /**
3489  * cik_scratch_init - setup driver info for CP scratch regs
3490  *
3491  * @rdev: radeon_device pointer
3492  *
3493  * Set up the number and offset of the CP scratch registers.
3494  * NOTE: use of CP scratch registers is a legacy inferface and
3495  * is not used by default on newer asics (r6xx+).  On newer asics,
3496  * memory buffers are used for fences rather than scratch regs.
3497  */
3498 static void cik_scratch_init(struct radeon_device *rdev)
3499 {
3500         int i;
3501
3502         rdev->scratch.num_reg = 7;
3503         rdev->scratch.reg_base = SCRATCH_REG0;
3504         for (i = 0; i < rdev->scratch.num_reg; i++) {
3505                 rdev->scratch.free[i] = true;
3506                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3507         }
3508 }
3509
3510 /**
3511  * cik_ring_test - basic gfx ring test
3512  *
3513  * @rdev: radeon_device pointer
3514  * @ring: radeon_ring structure holding ring information
3515  *
3516  * Allocate a scratch register and write to it using the gfx ring (CIK).
3517  * Provides a basic gfx ring test to verify that the ring is working.
3518  * Used by cik_cp_gfx_resume();
3519  * Returns 0 on success, error on failure.
3520  */
3521 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3522 {
3523         uint32_t scratch;
3524         uint32_t tmp = 0;
3525         unsigned i;
3526         int r;
3527
3528         r = radeon_scratch_get(rdev, &scratch);
3529         if (r) {
3530                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3531                 return r;
3532         }
3533         WREG32(scratch, 0xCAFEDEAD);
3534         r = radeon_ring_lock(rdev, ring, 3);
3535         if (r) {
3536                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3537                 radeon_scratch_free(rdev, scratch);
3538                 return r;
3539         }
3540         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3541         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3542         radeon_ring_write(ring, 0xDEADBEEF);
3543         radeon_ring_unlock_commit(rdev, ring, false);
3544
3545         for (i = 0; i < rdev->usec_timeout; i++) {
3546                 tmp = RREG32(scratch);
3547                 if (tmp == 0xDEADBEEF)
3548                         break;
3549                 DRM_UDELAY(1);
3550         }
3551         if (i < rdev->usec_timeout) {
3552                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3553         } else {
3554                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3555                           ring->idx, scratch, tmp);
3556                 r = -EINVAL;
3557         }
3558         radeon_scratch_free(rdev, scratch);
3559         return r;
3560 }
3561
3562 /**
3563  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3564  *
3565  * @rdev: radeon_device pointer
3566  * @ridx: radeon ring index
3567  *
3568  * Emits an hdp flush on the cp.
3569  */
3570 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3571                                        int ridx)
3572 {
3573         struct radeon_ring *ring = &rdev->ring[ridx];
3574         u32 ref_and_mask;
3575
3576         switch (ring->idx) {
3577         case CAYMAN_RING_TYPE_CP1_INDEX:
3578         case CAYMAN_RING_TYPE_CP2_INDEX:
3579         default:
3580                 switch (ring->me) {
3581                 case 0:
3582                         ref_and_mask = CP2 << ring->pipe;
3583                         break;
3584                 case 1:
3585                         ref_and_mask = CP6 << ring->pipe;
3586                         break;
3587                 default:
3588                         return;
3589                 }
3590                 break;
3591         case RADEON_RING_TYPE_GFX_INDEX:
3592                 ref_and_mask = CP0;
3593                 break;
3594         }
3595
3596         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3597         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3598                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3599                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3600         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3601         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3602         radeon_ring_write(ring, ref_and_mask);
3603         radeon_ring_write(ring, ref_and_mask);
3604         radeon_ring_write(ring, 0x20); /* poll interval */
3605 }
3606
3607 /**
3608  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3609  *
3610  * @rdev: radeon_device pointer
3611  * @fence: radeon fence object
3612  *
3613  * Emits a fence sequnce number on the gfx ring and flushes
3614  * GPU caches.
3615  */
3616 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3617                              struct radeon_fence *fence)
3618 {
3619         struct radeon_ring *ring = &rdev->ring[fence->ring];
3620         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3621
3622         /* Workaround for cache flush problems. First send a dummy EOP
3623          * event down the pipe with seq one below.
3624          */
3625         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3626         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3627                                  EOP_TC_ACTION_EN |
3628                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3629                                  EVENT_INDEX(5)));
3630         radeon_ring_write(ring, addr & 0xfffffffc);
3631         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3632                                 DATA_SEL(1) | INT_SEL(0));
3633         radeon_ring_write(ring, fence->seq - 1);
3634         radeon_ring_write(ring, 0);
3635
3636         /* Then send the real EOP event down the pipe. */
3637         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3638         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3639                                  EOP_TC_ACTION_EN |
3640                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3641                                  EVENT_INDEX(5)));
3642         radeon_ring_write(ring, addr & 0xfffffffc);
3643         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3644         radeon_ring_write(ring, fence->seq);
3645         radeon_ring_write(ring, 0);
3646 }
3647
3648 /**
3649  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3650  *
3651  * @rdev: radeon_device pointer
3652  * @fence: radeon fence object
3653  *
3654  * Emits a fence sequnce number on the compute ring and flushes
3655  * GPU caches.
3656  */
3657 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3658                                  struct radeon_fence *fence)
3659 {
3660         struct radeon_ring *ring = &rdev->ring[fence->ring];
3661         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3662
3663         /* RELEASE_MEM - flush caches, send int */
3664         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3665         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3666                                  EOP_TC_ACTION_EN |
3667                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3668                                  EVENT_INDEX(5)));
3669         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3670         radeon_ring_write(ring, addr & 0xfffffffc);
3671         radeon_ring_write(ring, upper_32_bits(addr));
3672         radeon_ring_write(ring, fence->seq);
3673         radeon_ring_write(ring, 0);
3674 }
3675
3676 /**
3677  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3678  *
3679  * @rdev: radeon_device pointer
3680  * @ring: radeon ring buffer object
3681  * @semaphore: radeon semaphore object
3682  * @emit_wait: Is this a sempahore wait?
3683  *
3684  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3685  * from running ahead of semaphore waits.
3686  */
3687 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3688                              struct radeon_ring *ring,
3689                              struct radeon_semaphore *semaphore,
3690                              bool emit_wait)
3691 {
3692         uint64_t addr = semaphore->gpu_addr;
3693         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3694
3695         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3696         radeon_ring_write(ring, lower_32_bits(addr));
3697         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3698
3699         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3700                 /* Prevent the PFP from running ahead of the semaphore wait */
3701                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3702                 radeon_ring_write(ring, 0x0);
3703         }
3704
3705         return true;
3706 }
3707
3708 /**
3709  * cik_copy_cpdma - copy pages using the CP DMA engine
3710  *
3711  * @rdev: radeon_device pointer
3712  * @src_offset: src GPU address
3713  * @dst_offset: dst GPU address
3714  * @num_gpu_pages: number of GPU pages to xfer
3715  * @fence: radeon fence object
3716  *
3717  * Copy GPU paging using the CP DMA engine (CIK+).
3718  * Used by the radeon ttm implementation to move pages if
3719  * registered as the asic copy callback.
3720  */
3721 int cik_copy_cpdma(struct radeon_device *rdev,
3722                    uint64_t src_offset, uint64_t dst_offset,
3723                    unsigned num_gpu_pages,
3724                    struct radeon_fence **fence)
3725 {
3726         struct radeon_semaphore *sem = NULL;
3727         int ring_index = rdev->asic->copy.blit_ring_index;
3728         struct radeon_ring *ring = &rdev->ring[ring_index];
3729         u32 size_in_bytes, cur_size_in_bytes, control;
3730         int i, num_loops;
3731         int r = 0;
3732
3733         r = radeon_semaphore_create(rdev, &sem);
3734         if (r) {
3735                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3736                 return r;
3737         }
3738
3739         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3740         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3741         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3742         if (r) {
3743                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3744                 radeon_semaphore_free(rdev, &sem, NULL);
3745                 return r;
3746         }
3747
3748         radeon_semaphore_sync_to(sem, *fence);
3749         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3750
3751         for (i = 0; i < num_loops; i++) {
3752                 cur_size_in_bytes = size_in_bytes;
3753                 if (cur_size_in_bytes > 0x1fffff)
3754                         cur_size_in_bytes = 0x1fffff;
3755                 size_in_bytes -= cur_size_in_bytes;
3756                 control = 0;
3757                 if (size_in_bytes == 0)
3758                         control |= PACKET3_DMA_DATA_CP_SYNC;
3759                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3760                 radeon_ring_write(ring, control);
3761                 radeon_ring_write(ring, lower_32_bits(src_offset));
3762                 radeon_ring_write(ring, upper_32_bits(src_offset));
3763                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3764                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3765                 radeon_ring_write(ring, cur_size_in_bytes);
3766                 src_offset += cur_size_in_bytes;
3767                 dst_offset += cur_size_in_bytes;
3768         }
3769
3770         r = radeon_fence_emit(rdev, fence, ring->idx);
3771         if (r) {
3772                 radeon_ring_unlock_undo(rdev, ring);
3773                 radeon_semaphore_free(rdev, &sem, NULL);
3774                 return r;
3775         }
3776
3777         radeon_ring_unlock_commit(rdev, ring, false);
3778         radeon_semaphore_free(rdev, &sem, *fence);
3779
3780         return r;
3781 }
3782
3783 /*
3784  * IB stuff
3785  */
3786 /**
3787  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3788  *
3789  * @rdev: radeon_device pointer
3790  * @ib: radeon indirect buffer object
3791  *
3792  * Emits a DE (drawing engine) or CE (constant engine) IB
3793  * on the gfx ring.  IBs are usually generated by userspace
3794  * acceleration drivers and submitted to the kernel for
3795  * scheduling on the ring.  This function schedules the IB
3796  * on the gfx ring for execution by the GPU.
3797  */
3798 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3799 {
3800         struct radeon_ring *ring = &rdev->ring[ib->ring];
3801         u32 header, control = INDIRECT_BUFFER_VALID;
3802
3803         if (ib->is_const_ib) {
3804                 /* set switch buffer packet before const IB */
3805                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3806                 radeon_ring_write(ring, 0);
3807
3808                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3809         } else {
3810                 u32 next_rptr;
3811                 if (ring->rptr_save_reg) {
3812                         next_rptr = ring->wptr + 3 + 4;
3813                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3814                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3815                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3816                         radeon_ring_write(ring, next_rptr);
3817                 } else if (rdev->wb.enabled) {
3818                         next_rptr = ring->wptr + 5 + 4;
3819                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3820                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3821                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3822                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3823                         radeon_ring_write(ring, next_rptr);
3824                 }
3825
3826                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3827         }
3828
3829         control |= ib->length_dw |
3830                 (ib->vm ? (ib->vm->id << 24) : 0);
3831
3832         radeon_ring_write(ring, header);
3833         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3834         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3835         radeon_ring_write(ring, control);
3836 }
3837
3838 /**
3839  * cik_ib_test - basic gfx ring IB test
3840  *
3841  * @rdev: radeon_device pointer
3842  * @ring: radeon_ring structure holding ring information
3843  *
3844  * Allocate an IB and execute it on the gfx ring (CIK).
3845  * Provides a basic gfx ring test to verify that IBs are working.
3846  * Returns 0 on success, error on failure.
3847  */
3848 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3849 {
3850         struct radeon_ib ib;
3851         uint32_t scratch;
3852         uint32_t tmp = 0;
3853         unsigned i;
3854         int r;
3855
3856         r = radeon_scratch_get(rdev, &scratch);
3857         if (r) {
3858                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3859                 return r;
3860         }
3861         WREG32(scratch, 0xCAFEDEAD);
3862         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3863         if (r) {
3864                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3865                 radeon_scratch_free(rdev, scratch);
3866                 return r;
3867         }
3868         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3869         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3870         ib.ptr[2] = 0xDEADBEEF;
3871         ib.length_dw = 3;
3872         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3873         if (r) {
3874                 radeon_scratch_free(rdev, scratch);
3875                 radeon_ib_free(rdev, &ib);
3876                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3877                 return r;
3878         }
3879         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3880                 RADEON_USEC_IB_TEST_TIMEOUT));
3881         if (r < 0) {
3882                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3883                 radeon_scratch_free(rdev, scratch);
3884                 radeon_ib_free(rdev, &ib);
3885                 return r;
3886         } else if (r == 0) {
3887                 DRM_ERROR("radeon: fence wait timed out.\n");
3888 #if 0
3889                 radeon_scratch_free(rdev, scratch);
3890                 radeon_ib_free(rdev, &ib);
3891                 return -ETIMEDOUT;
3892 #endif
3893         }
3894         r = 0;
3895         for (i = 0; i < rdev->usec_timeout; i++) {
3896                 tmp = RREG32(scratch);
3897                 if (tmp == 0xDEADBEEF)
3898                         break;
3899                 DRM_UDELAY(1);
3900         }
3901         if (i < rdev->usec_timeout) {
3902                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3903         } else {
3904                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3905                           scratch, tmp);
3906                 r = -EINVAL;
3907         }
3908         radeon_scratch_free(rdev, scratch);
3909         radeon_ib_free(rdev, &ib);
3910         return r;
3911 }
3912
3913 /*
3914  * CP.
3915  * On CIK, gfx and compute now have independant command processors.
3916  *
3917  * GFX
3918  * Gfx consists of a single ring and can process both gfx jobs and
3919  * compute jobs.  The gfx CP consists of three microengines (ME):
3920  * PFP - Pre-Fetch Parser
3921  * ME - Micro Engine
3922  * CE - Constant Engine
3923  * The PFP and ME make up what is considered the Drawing Engine (DE).
3924  * The CE is an asynchronous engine used for updating buffer desciptors
3925  * used by the DE so that they can be loaded into cache in parallel
3926  * while the DE is processing state update packets.
3927  *
3928  * Compute
3929  * The compute CP consists of two microengines (ME):
3930  * MEC1 - Compute MicroEngine 1
3931  * MEC2 - Compute MicroEngine 2
3932  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3933  * The queues are exposed to userspace and are programmed directly
3934  * by the compute runtime.
3935  */
3936 /**
3937  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3938  *
3939  * @rdev: radeon_device pointer
3940  * @enable: enable or disable the MEs
3941  *
3942  * Halts or unhalts the gfx MEs.
3943  */
3944 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3945 {
3946         if (enable)
3947                 WREG32(CP_ME_CNTL, 0);
3948         else {
3949                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3950                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3951                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3952                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3953         }
3954         udelay(50);
3955 }
3956
3957 /**
3958  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3959  *
3960  * @rdev: radeon_device pointer
3961  *
3962  * Loads the gfx PFP, ME, and CE ucode.
3963  * Returns 0 for success, -EINVAL if the ucode is not available.
3964  */
3965 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3966 {
3967         int i;
3968
3969         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3970                 return -EINVAL;
3971
3972         cik_cp_gfx_enable(rdev, false);
3973
3974         if (rdev->new_fw) {
3975                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3976                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3977                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3978                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3979                 const struct gfx_firmware_header_v1_0 *me_hdr =
3980                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3981                 const __le32 *fw_data;
3982                 u32 fw_size;
3983
3984                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3985                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3986                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3987
3988                 /* PFP */
3989                 fw_data = (const __le32 *)
3990                         ((const char *)rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3991                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3992                 WREG32(CP_PFP_UCODE_ADDR, 0);
3993                 for (i = 0; i < fw_size; i++)
3994                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3995                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3996
3997                 /* CE */
3998                 fw_data = (const __le32 *)
3999                         ((const char *)rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4000                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4001                 WREG32(CP_CE_UCODE_ADDR, 0);
4002                 for (i = 0; i < fw_size; i++)
4003                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4004                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4005
4006                 /* ME */
4007                 fw_data = (const __be32 *)
4008                         ((const char *)rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4009                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4010                 WREG32(CP_ME_RAM_WADDR, 0);
4011                 for (i = 0; i < fw_size; i++)
4012                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4013                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4014                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4015         } else {
4016                 const __be32 *fw_data;
4017
4018                 /* PFP */
4019                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4020                 WREG32(CP_PFP_UCODE_ADDR, 0);
4021                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4022                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4023                 WREG32(CP_PFP_UCODE_ADDR, 0);
4024
4025                 /* CE */
4026                 fw_data = (const __be32 *)rdev->ce_fw->data;
4027                 WREG32(CP_CE_UCODE_ADDR, 0);
4028                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4029                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4030                 WREG32(CP_CE_UCODE_ADDR, 0);
4031
4032                 /* ME */
4033                 fw_data = (const __be32 *)rdev->me_fw->data;
4034                 WREG32(CP_ME_RAM_WADDR, 0);
4035                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4036                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4037                 WREG32(CP_ME_RAM_WADDR, 0);
4038         }
4039
4040         return 0;
4041 }
4042
4043 /**
4044  * cik_cp_gfx_start - start the gfx ring
4045  *
4046  * @rdev: radeon_device pointer
4047  *
4048  * Enables the ring and loads the clear state context and other
4049  * packets required to init the ring.
4050  * Returns 0 for success, error for failure.
4051  */
4052 static int cik_cp_gfx_start(struct radeon_device *rdev)
4053 {
4054         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4055         int r, i;
4056
4057         /* init the CP */
4058         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4059         WREG32(CP_ENDIAN_SWAP, 0);
4060         WREG32(CP_DEVICE_ID, 1);
4061
4062         cik_cp_gfx_enable(rdev, true);
4063
4064         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4065         if (r) {
4066                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4067                 return r;
4068         }
4069
4070         /* init the CE partitions.  CE only used for gfx on CIK */
4071         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4072         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4073         radeon_ring_write(ring, 0x8000);
4074         radeon_ring_write(ring, 0x8000);
4075
4076         /* setup clear context state */
4077         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4078         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4079
4080         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4081         radeon_ring_write(ring, 0x80000000);
4082         radeon_ring_write(ring, 0x80000000);
4083
4084         for (i = 0; i < cik_default_size; i++)
4085                 radeon_ring_write(ring, cik_default_state[i]);
4086
4087         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4088         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4089
4090         /* set clear context state */
4091         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4092         radeon_ring_write(ring, 0);
4093
4094         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4095         radeon_ring_write(ring, 0x00000316);
4096         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4097         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4098
4099         radeon_ring_unlock_commit(rdev, ring, false);
4100
4101         return 0;
4102 }
4103
4104 /**
4105  * cik_cp_gfx_fini - stop the gfx ring
4106  *
4107  * @rdev: radeon_device pointer
4108  *
4109  * Stop the gfx ring and tear down the driver ring
4110  * info.
4111  */
4112 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4113 {
4114         cik_cp_gfx_enable(rdev, false);
4115         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4116 }
4117
4118 /**
4119  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4120  *
4121  * @rdev: radeon_device pointer
4122  *
4123  * Program the location and size of the gfx ring buffer
4124  * and test it to make sure it's working.
4125  * Returns 0 for success, error for failure.
4126  */
4127 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4128 {
4129         struct radeon_ring *ring;
4130         u32 tmp;
4131         u32 rb_bufsz;
4132         u64 rb_addr;
4133         int r;
4134
4135         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4136         if (rdev->family != CHIP_HAWAII)
4137                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4138
4139         /* Set the write pointer delay */
4140         WREG32(CP_RB_WPTR_DELAY, 0);
4141
4142         /* set the RB to use vmid 0 */
4143         WREG32(CP_RB_VMID, 0);
4144
4145         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4146
4147         /* ring 0 - compute and gfx */
4148         /* Set ring buffer size */
4149         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4150         rb_bufsz = order_base_2(ring->ring_size / 8);
4151         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4152 #ifdef __BIG_ENDIAN
4153         tmp |= BUF_SWAP_32BIT;
4154 #endif
4155         WREG32(CP_RB0_CNTL, tmp);
4156
4157         /* Initialize the ring buffer's read and write pointers */
4158         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4159         ring->wptr = 0;
4160         WREG32(CP_RB0_WPTR, ring->wptr);
4161
4162         /* set the wb address wether it's enabled or not */
4163         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4164         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4165
4166         /* scratch register shadowing is no longer supported */
4167         WREG32(SCRATCH_UMSK, 0);
4168
4169         if (!rdev->wb.enabled)
4170                 tmp |= RB_NO_UPDATE;
4171
4172         mdelay(1);
4173         WREG32(CP_RB0_CNTL, tmp);
4174
4175         rb_addr = ring->gpu_addr >> 8;
4176         WREG32(CP_RB0_BASE, rb_addr);
4177         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4178
4179         /* start the ring */
4180         cik_cp_gfx_start(rdev);
4181         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4182         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4183         if (r) {
4184                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4185                 return r;
4186         }
4187
4188         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4189                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4190
4191         return 0;
4192 }
4193
4194 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4195                      struct radeon_ring *ring)
4196 {
4197         u32 rptr;
4198
4199         if (rdev->wb.enabled)
4200                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4201         else
4202                 rptr = RREG32(CP_RB0_RPTR);
4203
4204         return rptr;
4205 }
4206
4207 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4208                      struct radeon_ring *ring)
4209 {
4210         u32 wptr;
4211
4212         wptr = RREG32(CP_RB0_WPTR);
4213
4214         return wptr;
4215 }
4216
4217 void cik_gfx_set_wptr(struct radeon_device *rdev,
4218                       struct radeon_ring *ring)
4219 {
4220         WREG32(CP_RB0_WPTR, ring->wptr);
4221         (void)RREG32(CP_RB0_WPTR);
4222 }
4223
4224 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4225                          struct radeon_ring *ring)
4226 {
4227         u32 rptr;
4228
4229         if (rdev->wb.enabled) {
4230                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4231         } else {
4232                 mutex_lock(&rdev->srbm_mutex);
4233                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4234                 rptr = RREG32(CP_HQD_PQ_RPTR);
4235                 cik_srbm_select(rdev, 0, 0, 0, 0);
4236                 mutex_unlock(&rdev->srbm_mutex);
4237         }
4238
4239         return rptr;
4240 }
4241
4242 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4243                          struct radeon_ring *ring)
4244 {
4245         u32 wptr;
4246
4247         if (rdev->wb.enabled) {
4248                 /* XXX check if swapping is necessary on BE */
4249                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4250         } else {
4251                 mutex_lock(&rdev->srbm_mutex);
4252                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4253                 wptr = RREG32(CP_HQD_PQ_WPTR);
4254                 cik_srbm_select(rdev, 0, 0, 0, 0);
4255                 mutex_unlock(&rdev->srbm_mutex);
4256         }
4257
4258         return wptr;
4259 }
4260
4261 void cik_compute_set_wptr(struct radeon_device *rdev,
4262                           struct radeon_ring *ring)
4263 {
4264         /* XXX check if swapping is necessary on BE */
4265         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4266         WDOORBELL32(ring->doorbell_index, ring->wptr);
4267 }
4268
4269 static void cik_compute_stop(struct radeon_device *rdev,
4270                              struct radeon_ring *ring)
4271 {
4272         u32 j, tmp;
4273
4274         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4275         /* Disable wptr polling. */
4276         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4277         tmp &= ~WPTR_POLL_EN;
4278         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4279         /* Disable HQD. */
4280         if (RREG32(CP_HQD_ACTIVE) & 1) {
4281                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4282                 for (j = 0; j < rdev->usec_timeout; j++) {
4283                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4284                                 break;
4285                         udelay(1);
4286                 }
4287                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4288                 WREG32(CP_HQD_PQ_RPTR, 0);
4289                 WREG32(CP_HQD_PQ_WPTR, 0);
4290         }
4291         cik_srbm_select(rdev, 0, 0, 0, 0);
4292 }
4293
4294 /**
4295  * cik_cp_compute_enable - enable/disable the compute CP MEs
4296  *
4297  * @rdev: radeon_device pointer
4298  * @enable: enable or disable the MEs
4299  *
4300  * Halts or unhalts the compute MEs.
4301  */
4302 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4303 {
4304         if (enable)
4305                 WREG32(CP_MEC_CNTL, 0);
4306         else {
4307                 /*
4308                  * To make hibernation reliable we need to clear compute ring
4309                  * configuration before halting the compute ring.
4310                  */
4311                 mutex_lock(&rdev->srbm_mutex);
4312                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4313                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4314                 mutex_unlock(&rdev->srbm_mutex);
4315
4316                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4317                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4318                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4319         }
4320         udelay(50);
4321 }
4322
4323 /**
4324  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4325  *
4326  * @rdev: radeon_device pointer
4327  *
4328  * Loads the compute MEC1&2 ucode.
4329  * Returns 0 for success, -EINVAL if the ucode is not available.
4330  */
4331 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4332 {
4333         int i;
4334
4335         if (!rdev->mec_fw)
4336                 return -EINVAL;
4337
4338         cik_cp_compute_enable(rdev, false);
4339
4340         if (rdev->new_fw) {
4341                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4342                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4343                 const __le32 *fw_data;
4344                 u32 fw_size;
4345
4346                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4347
4348                 /* MEC1 */
4349                 fw_data = (const __le32 *)
4350                         ((const char *)rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4351                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4352                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4353                 for (i = 0; i < fw_size; i++)
4354                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4355                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4356
4357                 /* MEC2 */
4358                 if (rdev->family == CHIP_KAVERI) {
4359                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4360                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4361
4362                         fw_data = (const __le32 *)
4363                                 ((const char *)rdev->mec2_fw->data +
4364                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4365                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4366                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4367                         for (i = 0; i < fw_size; i++)
4368                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4369                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4370                 }
4371         } else {
4372                 const __be32 *fw_data;
4373
4374                 /* MEC1 */
4375                 fw_data = (const __be32 *)rdev->mec_fw->data;
4376                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4377                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4378                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4379                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4380
4381                 if (rdev->family == CHIP_KAVERI) {
4382                         /* MEC2 */
4383                         fw_data = (const __be32 *)rdev->mec_fw->data;
4384                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4385                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4386                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4387                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4388                 }
4389         }
4390
4391         return 0;
4392 }
4393
4394 /**
4395  * cik_cp_compute_start - start the compute queues
4396  *
4397  * @rdev: radeon_device pointer
4398  *
4399  * Enable the compute queues.
4400  * Returns 0 for success, error for failure.
4401  */
4402 static int cik_cp_compute_start(struct radeon_device *rdev)
4403 {
4404         cik_cp_compute_enable(rdev, true);
4405
4406         return 0;
4407 }
4408
4409 /**
4410  * cik_cp_compute_fini - stop the compute queues
4411  *
4412  * @rdev: radeon_device pointer
4413  *
4414  * Stop the compute queues and tear down the driver queue
4415  * info.
4416  */
4417 static void cik_cp_compute_fini(struct radeon_device *rdev)
4418 {
4419         int i, idx, r;
4420
4421         cik_cp_compute_enable(rdev, false);
4422
4423         for (i = 0; i < 2; i++) {
4424                 if (i == 0)
4425                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4426                 else
4427                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4428
4429                 if (rdev->ring[idx].mqd_obj) {
4430                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4431                         if (unlikely(r != 0))
4432                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4433
4434                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4435                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4436
4437                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4438                         rdev->ring[idx].mqd_obj = NULL;
4439                 }
4440         }
4441 }
4442
4443 static void cik_mec_fini(struct radeon_device *rdev)
4444 {
4445         int r;
4446
4447         if (rdev->mec.hpd_eop_obj) {
4448                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4449                 if (unlikely(r != 0))
4450                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4451                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4452                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4453
4454                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4455                 rdev->mec.hpd_eop_obj = NULL;
4456         }
4457 }
4458
4459 #define MEC_HPD_SIZE 2048
4460
4461 static int cik_mec_init(struct radeon_device *rdev)
4462 {
4463         int r;
4464         u32 *hpd;
4465
4466         /*
4467          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4468          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4469          */
4470         if (rdev->family == CHIP_KAVERI)
4471                 rdev->mec.num_mec = 2;
4472         else
4473                 rdev->mec.num_mec = 1;
4474         rdev->mec.num_pipe = 4;
4475         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4476
4477         if (rdev->mec.hpd_eop_obj == NULL) {
4478                 r = radeon_bo_create(rdev,
4479                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4480                                      PAGE_SIZE, true,
4481                                      RADEON_GEM_DOMAIN_GTT, 0, NULL,
4482                                      &rdev->mec.hpd_eop_obj);
4483                 if (r) {
4484                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4485                         return r;
4486                 }
4487         }
4488
4489         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4490         if (unlikely(r != 0)) {
4491                 cik_mec_fini(rdev);
4492                 return r;
4493         }
4494         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4495                           &rdev->mec.hpd_eop_gpu_addr);
4496         if (r) {
4497                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4498                 cik_mec_fini(rdev);
4499                 return r;
4500         }
4501         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4502         if (r) {
4503                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4504                 cik_mec_fini(rdev);
4505                 return r;
4506         }
4507
4508         /* clear memory.  Not sure if this is required or not */
4509         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4510
4511         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4512         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4513
4514         return 0;
4515 }
4516
4517 struct hqd_registers
4518 {
4519         u32 cp_mqd_base_addr;
4520         u32 cp_mqd_base_addr_hi;
4521         u32 cp_hqd_active;
4522         u32 cp_hqd_vmid;
4523         u32 cp_hqd_persistent_state;
4524         u32 cp_hqd_pipe_priority;
4525         u32 cp_hqd_queue_priority;
4526         u32 cp_hqd_quantum;
4527         u32 cp_hqd_pq_base;
4528         u32 cp_hqd_pq_base_hi;
4529         u32 cp_hqd_pq_rptr;
4530         u32 cp_hqd_pq_rptr_report_addr;
4531         u32 cp_hqd_pq_rptr_report_addr_hi;
4532         u32 cp_hqd_pq_wptr_poll_addr;
4533         u32 cp_hqd_pq_wptr_poll_addr_hi;
4534         u32 cp_hqd_pq_doorbell_control;
4535         u32 cp_hqd_pq_wptr;
4536         u32 cp_hqd_pq_control;
4537         u32 cp_hqd_ib_base_addr;
4538         u32 cp_hqd_ib_base_addr_hi;
4539         u32 cp_hqd_ib_rptr;
4540         u32 cp_hqd_ib_control;
4541         u32 cp_hqd_iq_timer;
4542         u32 cp_hqd_iq_rptr;
4543         u32 cp_hqd_dequeue_request;
4544         u32 cp_hqd_dma_offload;
4545         u32 cp_hqd_sema_cmd;
4546         u32 cp_hqd_msg_type;
4547         u32 cp_hqd_atomic0_preop_lo;
4548         u32 cp_hqd_atomic0_preop_hi;
4549         u32 cp_hqd_atomic1_preop_lo;
4550         u32 cp_hqd_atomic1_preop_hi;
4551         u32 cp_hqd_hq_scheduler0;
4552         u32 cp_hqd_hq_scheduler1;
4553         u32 cp_mqd_control;
4554 };
4555
4556 struct bonaire_mqd
4557 {
4558         u32 header;
4559         u32 dispatch_initiator;
4560         u32 dimensions[3];
4561         u32 start_idx[3];
4562         u32 num_threads[3];
4563         u32 pipeline_stat_enable;
4564         u32 perf_counter_enable;
4565         u32 pgm[2];
4566         u32 tba[2];
4567         u32 tma[2];
4568         u32 pgm_rsrc[2];
4569         u32 vmid;
4570         u32 resource_limits;
4571         u32 static_thread_mgmt01[2];
4572         u32 tmp_ring_size;
4573         u32 static_thread_mgmt23[2];
4574         u32 restart[3];
4575         u32 thread_trace_enable;
4576         u32 reserved1;
4577         u32 user_data[16];
4578         u32 vgtcs_invoke_count[2];
4579         struct hqd_registers queue_state;
4580         u32 dequeue_cntr;
4581         u32 interrupt_queue[64];
4582 };
4583
4584 /**
4585  * cik_cp_compute_resume - setup the compute queue registers
4586  *
4587  * @rdev: radeon_device pointer
4588  *
4589  * Program the compute queues and test them to make sure they
4590  * are working.
4591  * Returns 0 for success, error for failure.
4592  */
4593 static int cik_cp_compute_resume(struct radeon_device *rdev)
4594 {
4595         int r, i, j, idx;
4596         u32 tmp;
4597         bool use_doorbell = true;
4598         u64 hqd_gpu_addr;
4599         u64 mqd_gpu_addr;
4600         u64 eop_gpu_addr;
4601         u64 wb_gpu_addr;
4602         u32 *buf;
4603         struct bonaire_mqd *mqd;
4604
4605         r = cik_cp_compute_start(rdev);
4606         if (r)
4607                 return r;
4608
4609         /* fix up chicken bits */
4610         tmp = RREG32(CP_CPF_DEBUG);
4611         tmp |= (1 << 23);
4612         WREG32(CP_CPF_DEBUG, tmp);
4613
4614         /* init the pipes */
4615         mutex_lock(&rdev->srbm_mutex);
4616         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4617                 int me = (i < 4) ? 1 : 2;
4618                 int pipe = (i < 4) ? i : (i - 4);
4619
4620                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4621
4622                 cik_srbm_select(rdev, me, pipe, 0, 0);
4623
4624                 /* write the EOP addr */
4625                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4626                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4627
4628                 /* set the VMID assigned */
4629                 WREG32(CP_HPD_EOP_VMID, 0);
4630
4631                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4632                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4633                 tmp &= ~EOP_SIZE_MASK;
4634                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4635                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4636         }
4637         cik_srbm_select(rdev, 0, 0, 0, 0);
4638         mutex_unlock(&rdev->srbm_mutex);
4639
4640         /* init the queues.  Just two for now. */
4641         for (i = 0; i < 2; i++) {
4642                 if (i == 0)
4643                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4644                 else
4645                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4646
4647                 if (rdev->ring[idx].mqd_obj == NULL) {
4648                         r = radeon_bo_create(rdev,
4649                                              sizeof(struct bonaire_mqd),
4650                                              PAGE_SIZE, true,
4651                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4652                                              &rdev->ring[idx].mqd_obj);
4653                         if (r) {
4654                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4655                                 return r;
4656                         }
4657                 }
4658
4659                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4660                 if (unlikely(r != 0)) {
4661                         cik_cp_compute_fini(rdev);
4662                         return r;
4663                 }
4664                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4665                                   &mqd_gpu_addr);
4666                 if (r) {
4667                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4668                         cik_cp_compute_fini(rdev);
4669                         return r;
4670                 }
4671                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4672                 if (r) {
4673                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4674                         cik_cp_compute_fini(rdev);
4675                         return r;
4676                 }
4677
4678                 /* init the mqd struct */
4679                 memset(buf, 0, sizeof(struct bonaire_mqd));
4680
4681                 mqd = (struct bonaire_mqd *)buf;
4682                 mqd->header = 0xC0310800;
4683                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4684                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4685                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4686                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4687
4688                 mutex_lock(&rdev->srbm_mutex);
4689                 cik_srbm_select(rdev, rdev->ring[idx].me,
4690                                 rdev->ring[idx].pipe,
4691                                 rdev->ring[idx].queue, 0);
4692
4693                 /* disable wptr polling */
4694                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4695                 tmp &= ~WPTR_POLL_EN;
4696                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4697
4698                 /* enable doorbell? */
4699                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4700                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4701                 if (use_doorbell)
4702                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4703                 else
4704                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4705                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4706                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4707
4708                 /* disable the queue if it's active */
4709                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4710                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4711                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4712                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4713                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4714                         for (j = 0; j < rdev->usec_timeout; j++) {
4715                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4716                                         break;
4717                                 udelay(1);
4718                         }
4719                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4720                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4721                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4722                 }
4723
4724                 /* set the pointer to the MQD */
4725                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4726                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4727                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4728                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4729                 /* set MQD vmid to 0 */
4730                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4731                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4732                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4733
4734                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4735                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4736                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4737                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4738                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4739                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4740
4741                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4742                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4743                 mqd->queue_state.cp_hqd_pq_control &=
4744                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4745
4746                 mqd->queue_state.cp_hqd_pq_control |=
4747                         order_base_2(rdev->ring[idx].ring_size / 8);
4748                 mqd->queue_state.cp_hqd_pq_control |=
4749                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4750 #ifdef __BIG_ENDIAN
4751                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4752 #endif
4753                 mqd->queue_state.cp_hqd_pq_control &=
4754                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4755                 mqd->queue_state.cp_hqd_pq_control |=
4756                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4757                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4758
4759                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4760                 if (i == 0)
4761                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4762                 else
4763                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4764                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4765                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4766                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4767                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4768                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4769
4770                 /* set the wb address wether it's enabled or not */
4771                 if (i == 0)
4772                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4773                 else
4774                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4775                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4776                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4777                         upper_32_bits(wb_gpu_addr) & 0xffff;
4778                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4779                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4780                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4781                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4782
4783                 /* enable the doorbell if requested */
4784                 if (use_doorbell) {
4785                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4786                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4787                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4788                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4789                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4790                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4791                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4792                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4793
4794                 } else {
4795                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4796                 }
4797                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4798                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4799
4800                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4801                 rdev->ring[idx].wptr = 0;
4802                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4803                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4804                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4805
4806                 /* set the vmid for the queue */
4807                 mqd->queue_state.cp_hqd_vmid = 0;
4808                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4809
4810                 /* activate the queue */
4811                 mqd->queue_state.cp_hqd_active = 1;
4812                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4813
4814                 cik_srbm_select(rdev, 0, 0, 0, 0);
4815                 mutex_unlock(&rdev->srbm_mutex);
4816
4817                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4818                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4819
4820                 rdev->ring[idx].ready = true;
4821                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4822                 if (r)
4823                         rdev->ring[idx].ready = false;
4824         }
4825
4826         return 0;
4827 }
4828
4829 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4830 {
4831         cik_cp_gfx_enable(rdev, enable);
4832         cik_cp_compute_enable(rdev, enable);
4833 }
4834
4835 static int cik_cp_load_microcode(struct radeon_device *rdev)
4836 {
4837         int r;
4838
4839         r = cik_cp_gfx_load_microcode(rdev);
4840         if (r)
4841                 return r;
4842         r = cik_cp_compute_load_microcode(rdev);
4843         if (r)
4844                 return r;
4845
4846         return 0;
4847 }
4848
4849 static void cik_cp_fini(struct radeon_device *rdev)
4850 {
4851         cik_cp_gfx_fini(rdev);
4852         cik_cp_compute_fini(rdev);
4853 }
4854
4855 static int cik_cp_resume(struct radeon_device *rdev)
4856 {
4857         int r;
4858
4859         cik_enable_gui_idle_interrupt(rdev, false);
4860
4861         r = cik_cp_load_microcode(rdev);
4862         if (r)
4863                 return r;
4864
4865         r = cik_cp_gfx_resume(rdev);
4866         if (r)
4867                 return r;
4868         r = cik_cp_compute_resume(rdev);
4869         if (r)
4870                 return r;
4871
4872         cik_enable_gui_idle_interrupt(rdev, true);
4873
4874         return 0;
4875 }
4876
4877 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4878 {
4879         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4880                 RREG32(GRBM_STATUS));
4881         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4882                 RREG32(GRBM_STATUS2));
4883         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4884                 RREG32(GRBM_STATUS_SE0));
4885         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4886                 RREG32(GRBM_STATUS_SE1));
4887         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4888                 RREG32(GRBM_STATUS_SE2));
4889         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4890                 RREG32(GRBM_STATUS_SE3));
4891         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4892                 RREG32(SRBM_STATUS));
4893         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4894                 RREG32(SRBM_STATUS2));
4895         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4896                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4897         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4898                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4899         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4900         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4901                  RREG32(CP_STALLED_STAT1));
4902         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4903                  RREG32(CP_STALLED_STAT2));
4904         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4905                  RREG32(CP_STALLED_STAT3));
4906         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4907                  RREG32(CP_CPF_BUSY_STAT));
4908         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4909                  RREG32(CP_CPF_STALLED_STAT1));
4910         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4911         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4912         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4913                  RREG32(CP_CPC_STALLED_STAT1));
4914         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4915 }
4916
4917 /**
4918  * cik_gpu_check_soft_reset - check which blocks are busy
4919  *
4920  * @rdev: radeon_device pointer
4921  *
4922  * Check which blocks are busy and return the relevant reset
4923  * mask to be used by cik_gpu_soft_reset().
4924  * Returns a mask of the blocks to be reset.
4925  */
4926 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4927 {
4928         u32 reset_mask = 0;
4929         u32 tmp;
4930
4931         /* GRBM_STATUS */
4932         tmp = RREG32(GRBM_STATUS);
4933         if (tmp & (PA_BUSY | SC_BUSY |
4934                    BCI_BUSY | SX_BUSY |
4935                    TA_BUSY | VGT_BUSY |
4936                    DB_BUSY | CB_BUSY |
4937                    GDS_BUSY | SPI_BUSY |
4938                    IA_BUSY | IA_BUSY_NO_DMA))
4939                 reset_mask |= RADEON_RESET_GFX;
4940
4941         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4942                 reset_mask |= RADEON_RESET_CP;
4943
4944         /* GRBM_STATUS2 */
4945         tmp = RREG32(GRBM_STATUS2);
4946         if (tmp & RLC_BUSY)
4947                 reset_mask |= RADEON_RESET_RLC;
4948
4949         /* SDMA0_STATUS_REG */
4950         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4951         if (!(tmp & SDMA_IDLE))
4952                 reset_mask |= RADEON_RESET_DMA;
4953
4954         /* SDMA1_STATUS_REG */
4955         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4956         if (!(tmp & SDMA_IDLE))
4957                 reset_mask |= RADEON_RESET_DMA1;
4958
4959         /* SRBM_STATUS2 */
4960         tmp = RREG32(SRBM_STATUS2);
4961         if (tmp & SDMA_BUSY)
4962                 reset_mask |= RADEON_RESET_DMA;
4963
4964         if (tmp & SDMA1_BUSY)
4965                 reset_mask |= RADEON_RESET_DMA1;
4966
4967         /* SRBM_STATUS */
4968         tmp = RREG32(SRBM_STATUS);
4969
4970         if (tmp & IH_BUSY)
4971                 reset_mask |= RADEON_RESET_IH;
4972
4973         if (tmp & SEM_BUSY)
4974                 reset_mask |= RADEON_RESET_SEM;
4975
4976         if (tmp & GRBM_RQ_PENDING)
4977                 reset_mask |= RADEON_RESET_GRBM;
4978
4979         if (tmp & VMC_BUSY)
4980                 reset_mask |= RADEON_RESET_VMC;
4981
4982         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4983                    MCC_BUSY | MCD_BUSY))
4984                 reset_mask |= RADEON_RESET_MC;
4985
4986         if (evergreen_is_display_hung(rdev))
4987                 reset_mask |= RADEON_RESET_DISPLAY;
4988
4989         /* Skip MC reset as it's mostly likely not hung, just busy */
4990         if (reset_mask & RADEON_RESET_MC) {
4991                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4992                 reset_mask &= ~RADEON_RESET_MC;
4993         }
4994
4995         return reset_mask;
4996 }
4997
4998 /**
4999  * cik_gpu_soft_reset - soft reset GPU
5000  *
5001  * @rdev: radeon_device pointer
5002  * @reset_mask: mask of which blocks to reset
5003  *
5004  * Soft reset the blocks specified in @reset_mask.
5005  */
5006 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5007 {
5008         struct evergreen_mc_save save;
5009         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5010         u32 tmp;
5011
5012         if (reset_mask == 0)
5013                 return;
5014
5015         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5016
5017         cik_print_gpu_status_regs(rdev);
5018         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5019                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5020         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5021                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5022
5023         /* disable CG/PG */
5024         cik_fini_pg(rdev);
5025         cik_fini_cg(rdev);
5026
5027         /* stop the rlc */
5028         cik_rlc_stop(rdev);
5029
5030         /* Disable GFX parsing/prefetching */
5031         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5032
5033         /* Disable MEC parsing/prefetching */
5034         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5035
5036         if (reset_mask & RADEON_RESET_DMA) {
5037                 /* sdma0 */
5038                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5039                 tmp |= SDMA_HALT;
5040                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5041         }
5042         if (reset_mask & RADEON_RESET_DMA1) {
5043                 /* sdma1 */
5044                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5045                 tmp |= SDMA_HALT;
5046                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5047         }
5048
5049         evergreen_mc_stop(rdev, &save);
5050         if (evergreen_mc_wait_for_idle(rdev)) {
5051                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5052         }
5053
5054         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5055                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5056
5057         if (reset_mask & RADEON_RESET_CP) {
5058                 grbm_soft_reset |= SOFT_RESET_CP;
5059
5060                 srbm_soft_reset |= SOFT_RESET_GRBM;
5061         }
5062
5063         if (reset_mask & RADEON_RESET_DMA)
5064                 srbm_soft_reset |= SOFT_RESET_SDMA;
5065
5066         if (reset_mask & RADEON_RESET_DMA1)
5067                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5068
5069         if (reset_mask & RADEON_RESET_DISPLAY)
5070                 srbm_soft_reset |= SOFT_RESET_DC;
5071
5072         if (reset_mask & RADEON_RESET_RLC)
5073                 grbm_soft_reset |= SOFT_RESET_RLC;
5074
5075         if (reset_mask & RADEON_RESET_SEM)
5076                 srbm_soft_reset |= SOFT_RESET_SEM;
5077
5078         if (reset_mask & RADEON_RESET_IH)
5079                 srbm_soft_reset |= SOFT_RESET_IH;
5080
5081         if (reset_mask & RADEON_RESET_GRBM)
5082                 srbm_soft_reset |= SOFT_RESET_GRBM;
5083
5084         if (reset_mask & RADEON_RESET_VMC)
5085                 srbm_soft_reset |= SOFT_RESET_VMC;
5086
5087         if (!(rdev->flags & RADEON_IS_IGP)) {
5088                 if (reset_mask & RADEON_RESET_MC)
5089                         srbm_soft_reset |= SOFT_RESET_MC;
5090         }
5091
5092         if (grbm_soft_reset) {
5093                 tmp = RREG32(GRBM_SOFT_RESET);
5094                 tmp |= grbm_soft_reset;
5095                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5096                 WREG32(GRBM_SOFT_RESET, tmp);
5097                 tmp = RREG32(GRBM_SOFT_RESET);
5098
5099                 udelay(50);
5100
5101                 tmp &= ~grbm_soft_reset;
5102                 WREG32(GRBM_SOFT_RESET, tmp);
5103                 tmp = RREG32(GRBM_SOFT_RESET);
5104         }
5105
5106         if (srbm_soft_reset) {
5107                 tmp = RREG32(SRBM_SOFT_RESET);
5108                 tmp |= srbm_soft_reset;
5109                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5110                 WREG32(SRBM_SOFT_RESET, tmp);
5111                 tmp = RREG32(SRBM_SOFT_RESET);
5112
5113                 udelay(50);
5114
5115                 tmp &= ~srbm_soft_reset;
5116                 WREG32(SRBM_SOFT_RESET, tmp);
5117                 tmp = RREG32(SRBM_SOFT_RESET);
5118         }
5119
5120         /* Wait a little for things to settle down */
5121         udelay(50);
5122
5123         evergreen_mc_resume(rdev, &save);
5124         udelay(50);
5125
5126         cik_print_gpu_status_regs(rdev);
5127 }
5128
5129 struct kv_reset_save_regs {
5130         u32 gmcon_reng_execute;
5131         u32 gmcon_misc;
5132         u32 gmcon_misc3;
5133 };
5134
5135 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5136                                    struct kv_reset_save_regs *save)
5137 {
5138         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5139         save->gmcon_misc = RREG32(GMCON_MISC);
5140         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5141
5142         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5143         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5144                                                 STCTRL_STUTTER_EN));
5145 }
5146
5147 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5148                                       struct kv_reset_save_regs *save)
5149 {
5150         int i;
5151
5152         WREG32(GMCON_PGFSM_WRITE, 0);
5153         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5154
5155         for (i = 0; i < 5; i++)
5156                 WREG32(GMCON_PGFSM_WRITE, 0);
5157
5158         WREG32(GMCON_PGFSM_WRITE, 0);
5159         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5160
5161         for (i = 0; i < 5; i++)
5162                 WREG32(GMCON_PGFSM_WRITE, 0);
5163
5164         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5165         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5166
5167         for (i = 0; i < 5; i++)
5168                 WREG32(GMCON_PGFSM_WRITE, 0);
5169
5170         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5171         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5172
5173         for (i = 0; i < 5; i++)
5174                 WREG32(GMCON_PGFSM_WRITE, 0);
5175
5176         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5177         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5178
5179         for (i = 0; i < 5; i++)
5180                 WREG32(GMCON_PGFSM_WRITE, 0);
5181
5182         WREG32(GMCON_PGFSM_WRITE, 0);
5183         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5184
5185         for (i = 0; i < 5; i++)
5186                 WREG32(GMCON_PGFSM_WRITE, 0);
5187
5188         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5189         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5190
5191         for (i = 0; i < 5; i++)
5192                 WREG32(GMCON_PGFSM_WRITE, 0);
5193
5194         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5195         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5196
5197         for (i = 0; i < 5; i++)
5198                 WREG32(GMCON_PGFSM_WRITE, 0);
5199
5200         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5201         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5202
5203         for (i = 0; i < 5; i++)
5204                 WREG32(GMCON_PGFSM_WRITE, 0);
5205
5206         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5207         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5208
5209         for (i = 0; i < 5; i++)
5210                 WREG32(GMCON_PGFSM_WRITE, 0);
5211
5212         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5213         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5214
5215         WREG32(GMCON_MISC3, save->gmcon_misc3);
5216         WREG32(GMCON_MISC, save->gmcon_misc);
5217         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5218 }
5219
5220 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5221 {
5222         struct evergreen_mc_save save;
5223         struct kv_reset_save_regs kv_save = { 0 };
5224         u32 tmp, i;
5225
5226         dev_info(rdev->dev, "GPU pci config reset\n");
5227
5228         /* disable dpm? */
5229
5230         /* disable cg/pg */
5231         cik_fini_pg(rdev);
5232         cik_fini_cg(rdev);
5233
5234         /* Disable GFX parsing/prefetching */
5235         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5236
5237         /* Disable MEC parsing/prefetching */
5238         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5239
5240         /* sdma0 */
5241         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5242         tmp |= SDMA_HALT;
5243         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5244         /* sdma1 */
5245         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5246         tmp |= SDMA_HALT;
5247         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5248         /* XXX other engines? */
5249
5250         /* halt the rlc, disable cp internal ints */
5251         cik_rlc_stop(rdev);
5252
5253         udelay(50);
5254
5255         /* disable mem access */
5256         evergreen_mc_stop(rdev, &save);
5257         if (evergreen_mc_wait_for_idle(rdev)) {
5258                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5259         }
5260
5261         if (rdev->flags & RADEON_IS_IGP)
5262                 kv_save_regs_for_reset(rdev, &kv_save);
5263
5264         /* disable BM */
5265         pci_disable_busmaster(rdev->pdev->dev.bsddev);
5266         /* reset */
5267         radeon_pci_config_reset(rdev);
5268
5269         udelay(100);
5270
5271         /* wait for asic to come out of reset */
5272         for (i = 0; i < rdev->usec_timeout; i++) {
5273                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5274                         break;
5275                 udelay(1);
5276         }
5277
5278         /* does asic init need to be run first??? */
5279         if (rdev->flags & RADEON_IS_IGP)
5280                 kv_restore_regs_for_reset(rdev, &kv_save);
5281 }
5282
5283 /**
5284  * cik_asic_reset - soft reset GPU
5285  *
5286  * @rdev: radeon_device pointer
5287  * @hard: force hard reset
5288  *
5289  * Look up which blocks are hung and attempt
5290  * to reset them.
5291  * Returns 0 for success.
5292  */
5293 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5294 {
5295         u32 reset_mask;
5296
5297         if (hard) {
5298                 cik_gpu_pci_config_reset(rdev);
5299                 return 0;
5300         }
5301
5302         reset_mask = cik_gpu_check_soft_reset(rdev);
5303
5304         if (reset_mask)
5305                 r600_set_bios_scratch_engine_hung(rdev, true);
5306
5307         /* try soft reset */
5308         cik_gpu_soft_reset(rdev, reset_mask);
5309
5310         reset_mask = cik_gpu_check_soft_reset(rdev);
5311
5312         /* try pci config reset */
5313         if (reset_mask && radeon_hard_reset)
5314                 cik_gpu_pci_config_reset(rdev);
5315
5316         reset_mask = cik_gpu_check_soft_reset(rdev);
5317
5318         if (!reset_mask)
5319                 r600_set_bios_scratch_engine_hung(rdev, false);
5320
5321         return 0;
5322 }
5323
5324 /**
5325  * cik_gfx_is_lockup - check if the 3D engine is locked up
5326  *
5327  * @rdev: radeon_device pointer
5328  * @ring: radeon_ring structure holding ring information
5329  *
5330  * Check if the 3D engine is locked up (CIK).
5331  * Returns true if the engine is locked, false if not.
5332  */
5333 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5334 {
5335         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5336
5337         if (!(reset_mask & (RADEON_RESET_GFX |
5338                             RADEON_RESET_COMPUTE |
5339                             RADEON_RESET_CP))) {
5340                 radeon_ring_lockup_update(rdev, ring);
5341                 return false;
5342         }
5343         return radeon_ring_test_lockup(rdev, ring);
5344 }
5345
5346 /* MC */
5347 /**
5348  * cik_mc_program - program the GPU memory controller
5349  *
5350  * @rdev: radeon_device pointer
5351  *
5352  * Set the location of vram, gart, and AGP in the GPU's
5353  * physical address space (CIK).
5354  */
5355 static void cik_mc_program(struct radeon_device *rdev)
5356 {
5357         struct evergreen_mc_save save;
5358         u32 tmp;
5359         int i, j;
5360
5361         /* Initialize HDP */
5362         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5363                 WREG32((0x2c14 + j), 0x00000000);
5364                 WREG32((0x2c18 + j), 0x00000000);
5365                 WREG32((0x2c1c + j), 0x00000000);
5366                 WREG32((0x2c20 + j), 0x00000000);
5367                 WREG32((0x2c24 + j), 0x00000000);
5368         }
5369         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5370
5371         evergreen_mc_stop(rdev, &save);
5372         if (radeon_mc_wait_for_idle(rdev)) {
5373                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5374         }
5375         /* Lockout access through VGA aperture*/
5376         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5377         /* Update configuration */
5378         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5379                rdev->mc.vram_start >> 12);
5380         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5381                rdev->mc.vram_end >> 12);
5382         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5383                rdev->vram_scratch.gpu_addr >> 12);
5384         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5385         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5386         WREG32(MC_VM_FB_LOCATION, tmp);
5387         /* XXX double check these! */
5388         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5389         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5390         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5391         WREG32(MC_VM_AGP_BASE, 0);
5392         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5393         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5394         if (radeon_mc_wait_for_idle(rdev)) {
5395                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5396         }
5397         evergreen_mc_resume(rdev, &save);
5398         /* we need to own VRAM, so turn off the VGA renderer here
5399          * to stop it overwriting our objects */
5400         rv515_vga_render_disable(rdev);
5401 }
5402
5403 /**
5404  * cik_mc_init - initialize the memory controller driver params
5405  *
5406  * @rdev: radeon_device pointer
5407  *
5408  * Look up the amount of vram, vram width, and decide how to place
5409  * vram and gart within the GPU's physical address space (CIK).
5410  * Returns 0 for success.
5411  */
5412 static int cik_mc_init(struct radeon_device *rdev)
5413 {
5414         u32 tmp;
5415         int chansize, numchan;
5416
5417         /* Get VRAM informations */
5418         rdev->mc.vram_is_ddr = true;
5419         tmp = RREG32(MC_ARB_RAMCFG);
5420         if (tmp & CHANSIZE_MASK) {
5421                 chansize = 64;
5422         } else {
5423                 chansize = 32;
5424         }
5425         tmp = RREG32(MC_SHARED_CHMAP);
5426         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5427         case 0:
5428         default:
5429                 numchan = 1;
5430                 break;
5431         case 1:
5432                 numchan = 2;
5433                 break;
5434         case 2:
5435                 numchan = 4;
5436                 break;
5437         case 3:
5438                 numchan = 8;
5439                 break;
5440         case 4:
5441                 numchan = 3;
5442                 break;
5443         case 5:
5444                 numchan = 6;
5445                 break;
5446         case 6:
5447                 numchan = 10;
5448                 break;
5449         case 7:
5450                 numchan = 12;
5451                 break;
5452         case 8:
5453                 numchan = 16;
5454                 break;
5455         }
5456         rdev->mc.vram_width = numchan * chansize;
5457         /* Could aper size report 0 ? */
5458         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5459         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5460         /* size in MB on si */
5461         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5462         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5463         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5464         si_vram_gtt_location(rdev, &rdev->mc);
5465         radeon_update_bandwidth_info(rdev);
5466
5467         return 0;
5468 }
5469
5470 /*
5471  * GART
5472  * VMID 0 is the physical GPU addresses as used by the kernel.
5473  * VMIDs 1-15 are used for userspace clients and are handled
5474  * by the radeon vm/hsa code.
5475  */
5476 /**
5477  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5478  *
5479  * @rdev: radeon_device pointer
5480  *
5481  * Flush the TLB for the VMID 0 page table (CIK).
5482  */
5483 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5484 {
5485         /* flush hdp cache */
5486         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5487
5488         /* bits 0-15 are the VM contexts0-15 */
5489         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5490 }
5491
5492 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5493 {
5494         int i;
5495         uint32_t sh_mem_bases, sh_mem_config;
5496
5497         sh_mem_bases = 0x6000 | 0x6000 << 16;
5498         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5499         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5500
5501         mutex_lock(&rdev->srbm_mutex);
5502         for (i = 8; i < 16; i++) {
5503                 cik_srbm_select(rdev, 0, 0, 0, i);
5504                 /* CP and shaders */
5505                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5506                 WREG32(SH_MEM_APE1_BASE, 1);
5507                 WREG32(SH_MEM_APE1_LIMIT, 0);
5508                 WREG32(SH_MEM_BASES, sh_mem_bases);
5509         }
5510         cik_srbm_select(rdev, 0, 0, 0, 0);
5511         mutex_unlock(&rdev->srbm_mutex);
5512 }
5513
5514 /**
5515  * cik_pcie_gart_enable - gart enable
5516  *
5517  * @rdev: radeon_device pointer
5518  *
5519  * This sets up the TLBs, programs the page tables for VMID0,
5520  * sets up the hw for VMIDs 1-15 which are allocated on
5521  * demand, and sets up the global locations for the LDS, GDS,
5522  * and GPUVM for FSA64 clients (CIK).
5523  * Returns 0 for success, errors for failure.
5524  */
5525 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5526 {
5527         int r, i;
5528
5529         if (rdev->gart.robj == NULL) {
5530                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5531                 return -EINVAL;
5532         }
5533         r = radeon_gart_table_vram_pin(rdev);
5534         if (r)
5535                 return r;
5536         /* Setup TLB control */
5537         WREG32(MC_VM_MX_L1_TLB_CNTL,
5538                (0xA << 7) |
5539                ENABLE_L1_TLB |
5540                ENABLE_L1_FRAGMENT_PROCESSING |
5541                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5542                ENABLE_ADVANCED_DRIVER_MODEL |
5543                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5544         /* Setup L2 cache */
5545         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5546                ENABLE_L2_FRAGMENT_PROCESSING |
5547                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5548                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5549                EFFECTIVE_L2_QUEUE_SIZE(7) |
5550                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5551         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5552         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5553                BANK_SELECT(4) |
5554                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5555         /* setup context0 */
5556         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5557         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5558         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5559         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5560                         (u32)(rdev->dummy_page.addr >> 12));
5561         WREG32(VM_CONTEXT0_CNTL2, 0);
5562         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5563                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5564
5565         WREG32(0x15D4, 0);
5566         WREG32(0x15D8, 0);
5567         WREG32(0x15DC, 0);
5568
5569         /* restore context1-15 */
5570         /* set vm size, must be a multiple of 4 */
5571         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5572         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5573         for (i = 1; i < 16; i++) {
5574                 if (i < 8)
5575                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5576                                rdev->vm_manager.saved_table_addr[i]);
5577                 else
5578                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5579                                rdev->vm_manager.saved_table_addr[i]);
5580         }
5581
5582         /* enable context1-15 */
5583         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5584                (u32)(rdev->dummy_page.addr >> 12));
5585         WREG32(VM_CONTEXT1_CNTL2, 4);
5586         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5587                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5588                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5589                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5590                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5591                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5592                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5593                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5594                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5595                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5596                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5597                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5598                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5599                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5600
5601         if (rdev->family == CHIP_KAVERI) {
5602                 u32 tmp = RREG32(CHUB_CONTROL);
5603                 tmp &= ~BYPASS_VM;
5604                 WREG32(CHUB_CONTROL, tmp);
5605         }
5606
5607         /* XXX SH_MEM regs */
5608         /* where to put LDS, scratch, GPUVM in FSA64 space */
5609         mutex_lock(&rdev->srbm_mutex);
5610         for (i = 0; i < 16; i++) {
5611                 cik_srbm_select(rdev, 0, 0, 0, i);
5612                 /* CP and shaders */
5613                 WREG32(SH_MEM_CONFIG, 0);
5614                 WREG32(SH_MEM_APE1_BASE, 1);
5615                 WREG32(SH_MEM_APE1_LIMIT, 0);
5616                 WREG32(SH_MEM_BASES, 0);
5617                 /* SDMA GFX */
5618                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5619                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5620                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5621                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5622                 /* XXX SDMA RLC - todo */
5623         }
5624         cik_srbm_select(rdev, 0, 0, 0, 0);
5625         mutex_unlock(&rdev->srbm_mutex);
5626
5627         cik_pcie_init_compute_vmid(rdev);
5628
5629         cik_pcie_gart_tlb_flush(rdev);
5630         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5631                  (unsigned)(rdev->mc.gtt_size >> 20),
5632                  (unsigned long long)rdev->gart.table_addr);
5633         rdev->gart.ready = true;
5634         return 0;
5635 }
5636
5637 /**
5638  * cik_pcie_gart_disable - gart disable
5639  *
5640  * @rdev: radeon_device pointer
5641  *
5642  * This disables all VM page table (CIK).
5643  */
5644 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5645 {
5646         unsigned i;
5647
5648         for (i = 1; i < 16; ++i) {
5649                 uint32_t reg;
5650                 if (i < 8)
5651                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5652                 else
5653                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5654                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5655         }
5656
5657         /* Disable all tables */
5658         WREG32(VM_CONTEXT0_CNTL, 0);
5659         WREG32(VM_CONTEXT1_CNTL, 0);
5660         /* Setup TLB control */
5661         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5662                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5663         /* Setup L2 cache */
5664         WREG32(VM_L2_CNTL,
5665                ENABLE_L2_FRAGMENT_PROCESSING |
5666                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5667                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5668                EFFECTIVE_L2_QUEUE_SIZE(7) |
5669                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5670         WREG32(VM_L2_CNTL2, 0);
5671         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5672                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5673         radeon_gart_table_vram_unpin(rdev);
5674 }
5675
5676 /**
5677  * cik_pcie_gart_fini - vm fini callback
5678  *
5679  * @rdev: radeon_device pointer
5680  *
5681  * Tears down the driver GART/VM setup (CIK).
5682  */
5683 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5684 {
5685         cik_pcie_gart_disable(rdev);
5686         radeon_gart_table_vram_free(rdev);
5687         radeon_gart_fini(rdev);
5688 }
5689
5690 /* vm parser */
5691 /**
5692  * cik_ib_parse - vm ib_parse callback
5693  *
5694  * @rdev: radeon_device pointer
5695  * @ib: indirect buffer pointer
5696  *
5697  * CIK uses hw IB checking so this is a nop (CIK).
5698  */
5699 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5700 {
5701         return 0;
5702 }
5703
5704 /*
5705  * vm
5706  * VMID 0 is the physical GPU addresses as used by the kernel.
5707  * VMIDs 1-15 are used for userspace clients and are handled
5708  * by the radeon vm/hsa code.
5709  */
5710 /**
5711  * cik_vm_init - cik vm init callback
5712  *
5713  * @rdev: radeon_device pointer
5714  *
5715  * Inits cik specific vm parameters (number of VMs, base of vram for
5716  * VMIDs 1-15) (CIK).
5717  * Returns 0 for success.
5718  */
5719 int cik_vm_init(struct radeon_device *rdev)
5720 {
5721         /* number of VMs */
5722         rdev->vm_manager.nvm = 16;
5723         /* base offset of vram pages */
5724         if (rdev->flags & RADEON_IS_IGP) {
5725                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5726                 tmp <<= 22;
5727                 rdev->vm_manager.vram_base_offset = tmp;
5728         } else
5729                 rdev->vm_manager.vram_base_offset = 0;
5730
5731         return 0;
5732 }
5733
5734 /**
5735  * cik_vm_fini - cik vm fini callback
5736  *
5737  * @rdev: radeon_device pointer
5738  *
5739  * Tear down any asic specific VM setup (CIK).
5740  */
5741 void cik_vm_fini(struct radeon_device *rdev)
5742 {
5743 }
5744
5745 /**
5746  * cik_vm_decode_fault - print human readable fault info
5747  *
5748  * @rdev: radeon_device pointer
5749  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5750  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5751  *
5752  * Print human readable fault information (CIK).
5753  */
5754 static void cik_vm_decode_fault(struct radeon_device *rdev,
5755                                 u32 status, u32 addr, u32 mc_client)
5756 {
5757         u32 mc_id;
5758         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5759         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5760         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5761                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5762
5763         if (rdev->family == CHIP_HAWAII)
5764                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5765         else
5766                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5767
5768         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5769                protections, vmid, addr,
5770                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5771                block, mc_client, mc_id);
5772 }
5773
5774 /**
5775  * cik_vm_flush - cik vm flush using the CP
5776  *
5777  * @rdev: radeon_device pointer
5778  *
5779  * Update the page table base and flush the VM TLB
5780  * using the CP (CIK).
5781  */
5782 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5783                   unsigned vm_id, uint64_t pd_addr)
5784 {
5785         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5786
5787         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5788         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5789                                  WRITE_DATA_DST_SEL(0)));
5790         if (vm_id < 8) {
5791                 radeon_ring_write(ring,
5792                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5793         } else {
5794                 radeon_ring_write(ring,
5795                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5796         }
5797         radeon_ring_write(ring, 0);
5798         radeon_ring_write(ring, pd_addr >> 12);
5799
5800         /* update SH_MEM_* regs */
5801         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5802         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5803                                  WRITE_DATA_DST_SEL(0)));
5804         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5805         radeon_ring_write(ring, 0);
5806         radeon_ring_write(ring, VMID(vm_id));
5807
5808         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5809         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5810                                  WRITE_DATA_DST_SEL(0)));
5811         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5812         radeon_ring_write(ring, 0);
5813
5814         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5815         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5816         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5817         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5818
5819         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5820         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5821                                  WRITE_DATA_DST_SEL(0)));
5822         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5823         radeon_ring_write(ring, 0);
5824         radeon_ring_write(ring, VMID(0));
5825
5826         /* HDP flush */
5827         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5828
5829         /* bits 0-15 are the VM contexts0-15 */
5830         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5831         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5832                                  WRITE_DATA_DST_SEL(0)));
5833         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5834         radeon_ring_write(ring, 0);
5835         radeon_ring_write(ring, 1 << vm_id);
5836
5837         /* wait for the invalidate to complete */
5838         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5839         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5840                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5841                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5842         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5843         radeon_ring_write(ring, 0);
5844         radeon_ring_write(ring, 0); /* ref */
5845         radeon_ring_write(ring, 0); /* mask */
5846         radeon_ring_write(ring, 0x20); /* poll interval */
5847
5848         /* compute doesn't have PFP */
5849         if (usepfp) {
5850                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5851                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5852                 radeon_ring_write(ring, 0x0);
5853         }
5854 }
5855
5856 /*
5857  * RLC
5858  * The RLC is a multi-purpose microengine that handles a
5859  * variety of functions, the most important of which is
5860  * the interrupt controller.
5861  */
5862 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5863                                           bool enable)
5864 {
5865         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5866
5867         if (enable)
5868                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5869         else
5870                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5871         WREG32(CP_INT_CNTL_RING0, tmp);
5872 }
5873
5874 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5875 {
5876         u32 tmp;
5877
5878         tmp = RREG32(RLC_LB_CNTL);
5879         if (enable)
5880                 tmp |= LOAD_BALANCE_ENABLE;
5881         else
5882                 tmp &= ~LOAD_BALANCE_ENABLE;
5883         WREG32(RLC_LB_CNTL, tmp);
5884 }
5885
5886 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5887 {
5888         u32 i, j, k;
5889         u32 mask;
5890
5891         mutex_lock(&rdev->grbm_idx_mutex);
5892         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5893                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5894                         cik_select_se_sh(rdev, i, j);
5895                         for (k = 0; k < rdev->usec_timeout; k++) {
5896                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5897                                         break;
5898                                 udelay(1);
5899                         }
5900                 }
5901         }
5902         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5903         mutex_unlock(&rdev->grbm_idx_mutex);
5904
5905         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5906         for (k = 0; k < rdev->usec_timeout; k++) {
5907                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5908                         break;
5909                 udelay(1);
5910         }
5911 }
5912
5913 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5914 {
5915         u32 tmp;
5916
5917         tmp = RREG32(RLC_CNTL);
5918         if (tmp != rlc)
5919                 WREG32(RLC_CNTL, rlc);
5920 }
5921
5922 static u32 cik_halt_rlc(struct radeon_device *rdev)
5923 {
5924         u32 data, orig;
5925
5926         orig = data = RREG32(RLC_CNTL);
5927
5928         if (data & RLC_ENABLE) {
5929                 u32 i;
5930
5931                 data &= ~RLC_ENABLE;
5932                 WREG32(RLC_CNTL, data);
5933
5934                 for (i = 0; i < rdev->usec_timeout; i++) {
5935                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5936                                 break;
5937                         udelay(1);
5938                 }
5939
5940                 cik_wait_for_rlc_serdes(rdev);
5941         }
5942
5943         return orig;
5944 }
5945
5946 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5947 {
5948         u32 tmp, i, mask;
5949
5950         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5951         WREG32(RLC_GPR_REG2, tmp);
5952
5953         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5954         for (i = 0; i < rdev->usec_timeout; i++) {
5955                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5956                         break;
5957                 udelay(1);
5958         }
5959
5960         for (i = 0; i < rdev->usec_timeout; i++) {
5961                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5962                         break;
5963                 udelay(1);
5964         }
5965 }
5966
5967 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5968 {
5969         u32 tmp;
5970
5971         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5972         WREG32(RLC_GPR_REG2, tmp);
5973 }
5974
5975 /**
5976  * cik_rlc_stop - stop the RLC ME
5977  *
5978  * @rdev: radeon_device pointer
5979  *
5980  * Halt the RLC ME (MicroEngine) (CIK).
5981  */
5982 static void cik_rlc_stop(struct radeon_device *rdev)
5983 {
5984         WREG32(RLC_CNTL, 0);
5985
5986         cik_enable_gui_idle_interrupt(rdev, false);
5987
5988         cik_wait_for_rlc_serdes(rdev);
5989 }
5990
5991 /**
5992  * cik_rlc_start - start the RLC ME
5993  *
5994  * @rdev: radeon_device pointer
5995  *
5996  * Unhalt the RLC ME (MicroEngine) (CIK).
5997  */
5998 static void cik_rlc_start(struct radeon_device *rdev)
5999 {
6000         WREG32(RLC_CNTL, RLC_ENABLE);
6001
6002         cik_enable_gui_idle_interrupt(rdev, true);
6003
6004         udelay(50);
6005 }
6006
6007 /**
6008  * cik_rlc_resume - setup the RLC hw
6009  *
6010  * @rdev: radeon_device pointer
6011  *
6012  * Initialize the RLC registers, load the ucode,
6013  * and start the RLC (CIK).
6014  * Returns 0 for success, -EINVAL if the ucode is not available.
6015  */
6016 static int cik_rlc_resume(struct radeon_device *rdev)
6017 {
6018         u32 i, size, tmp;
6019
6020         if (!rdev->rlc_fw)
6021                 return -EINVAL;
6022
6023         cik_rlc_stop(rdev);
6024
6025         /* disable CG */
6026         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6027         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6028
6029         si_rlc_reset(rdev);
6030
6031         cik_init_pg(rdev);
6032
6033         cik_init_cg(rdev);
6034
6035         WREG32(RLC_LB_CNTR_INIT, 0);
6036         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6037
6038         mutex_lock(&rdev->grbm_idx_mutex);
6039         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6040         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6041         WREG32(RLC_LB_PARAMS, 0x00600408);
6042         WREG32(RLC_LB_CNTL, 0x80000004);
6043         mutex_unlock(&rdev->grbm_idx_mutex);
6044
6045         WREG32(RLC_MC_CNTL, 0);
6046         WREG32(RLC_UCODE_CNTL, 0);
6047
6048         if (rdev->new_fw) {
6049                 const struct rlc_firmware_header_v1_0 *hdr =
6050                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6051                 const __le32 *fw_data = (const __le32 *)
6052                         ((const char *)rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6053
6054                 radeon_ucode_print_rlc_hdr(&hdr->header);
6055
6056                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6057                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6058                 for (i = 0; i < size; i++)
6059                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6060                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6061         } else {
6062                 const __be32 *fw_data;
6063
6064                 switch (rdev->family) {
6065                 case CHIP_BONAIRE:
6066                 case CHIP_HAWAII:
6067                 default:
6068                         size = BONAIRE_RLC_UCODE_SIZE;
6069                         break;
6070                 case CHIP_KAVERI:
6071                         size = KV_RLC_UCODE_SIZE;
6072                         break;
6073                 case CHIP_KABINI:
6074                         size = KB_RLC_UCODE_SIZE;
6075                         break;
6076                 case CHIP_MULLINS:
6077                         size = ML_RLC_UCODE_SIZE;
6078                         break;
6079                 }
6080
6081                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6082                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6083                 for (i = 0; i < size; i++)
6084                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6085                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6086         }
6087
6088         /* XXX - find out what chips support lbpw */
6089         cik_enable_lbpw(rdev, false);
6090
6091         if (rdev->family == CHIP_BONAIRE)
6092                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6093
6094         cik_rlc_start(rdev);
6095
6096         return 0;
6097 }
6098
6099 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6100 {
6101         u32 data, orig, tmp, tmp2;
6102
6103         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6104
6105         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6106                 cik_enable_gui_idle_interrupt(rdev, true);
6107
6108                 tmp = cik_halt_rlc(rdev);
6109
6110                 mutex_lock(&rdev->grbm_idx_mutex);
6111                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6112                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6113                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6114                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6115                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6116                 mutex_unlock(&rdev->grbm_idx_mutex);
6117
6118                 cik_update_rlc(rdev, tmp);
6119
6120                 data |= CGCG_EN | CGLS_EN;
6121         } else {
6122                 cik_enable_gui_idle_interrupt(rdev, false);
6123
6124                 RREG32(CB_CGTT_SCLK_CTRL);
6125                 RREG32(CB_CGTT_SCLK_CTRL);
6126                 RREG32(CB_CGTT_SCLK_CTRL);
6127                 RREG32(CB_CGTT_SCLK_CTRL);
6128
6129                 data &= ~(CGCG_EN | CGLS_EN);
6130         }
6131
6132         if (orig != data)
6133                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6134
6135 }
6136
6137 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6138 {
6139         u32 data, orig, tmp = 0;
6140
6141         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6142                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6143                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6144                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6145                                 data |= CP_MEM_LS_EN;
6146                                 if (orig != data)
6147                                         WREG32(CP_MEM_SLP_CNTL, data);
6148                         }
6149                 }
6150
6151                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6152                 data |= 0x00000001;
6153                 data &= 0xfffffffd;
6154                 if (orig != data)
6155                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6156
6157                 tmp = cik_halt_rlc(rdev);
6158
6159                 mutex_lock(&rdev->grbm_idx_mutex);
6160                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6161                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6162                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6163                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6164                 WREG32(RLC_SERDES_WR_CTRL, data);
6165                 mutex_unlock(&rdev->grbm_idx_mutex);
6166
6167                 cik_update_rlc(rdev, tmp);
6168
6169                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6170                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6171                         data &= ~SM_MODE_MASK;
6172                         data |= SM_MODE(0x2);
6173                         data |= SM_MODE_ENABLE;
6174                         data &= ~CGTS_OVERRIDE;
6175                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6176                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6177                                 data &= ~CGTS_LS_OVERRIDE;
6178                         data &= ~ON_MONITOR_ADD_MASK;
6179                         data |= ON_MONITOR_ADD_EN;
6180                         data |= ON_MONITOR_ADD(0x96);
6181                         if (orig != data)
6182                                 WREG32(CGTS_SM_CTRL_REG, data);
6183                 }
6184         } else {
6185                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6186                 data |= 0x00000003;
6187                 if (orig != data)
6188                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6189
6190                 data = RREG32(RLC_MEM_SLP_CNTL);
6191                 if (data & RLC_MEM_LS_EN) {
6192                         data &= ~RLC_MEM_LS_EN;
6193                         WREG32(RLC_MEM_SLP_CNTL, data);
6194                 }
6195
6196                 data = RREG32(CP_MEM_SLP_CNTL);
6197                 if (data & CP_MEM_LS_EN) {
6198                         data &= ~CP_MEM_LS_EN;
6199                         WREG32(CP_MEM_SLP_CNTL, data);
6200                 }
6201
6202                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6203                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6204                 if (orig != data)
6205                         WREG32(CGTS_SM_CTRL_REG, data);
6206
6207                 tmp = cik_halt_rlc(rdev);
6208
6209                 mutex_lock(&rdev->grbm_idx_mutex);
6210                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6211                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6212                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6213                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6214                 WREG32(RLC_SERDES_WR_CTRL, data);
6215                 mutex_unlock(&rdev->grbm_idx_mutex);
6216
6217                 cik_update_rlc(rdev, tmp);
6218         }
6219 }
6220
6221 static const u32 mc_cg_registers[] =
6222 {
6223         MC_HUB_MISC_HUB_CG,
6224         MC_HUB_MISC_SIP_CG,
6225         MC_HUB_MISC_VM_CG,
6226         MC_XPB_CLK_GAT,
6227         ATC_MISC_CG,
6228         MC_CITF_MISC_WR_CG,
6229         MC_CITF_MISC_RD_CG,
6230         MC_CITF_MISC_VM_CG,
6231         VM_L2_CG,
6232 };
6233
6234 static void cik_enable_mc_ls(struct radeon_device *rdev,
6235                              bool enable)
6236 {
6237         int i;
6238         u32 orig, data;
6239
6240         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6241                 orig = data = RREG32(mc_cg_registers[i]);
6242                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6243                         data |= MC_LS_ENABLE;
6244                 else
6245                         data &= ~MC_LS_ENABLE;
6246                 if (data != orig)
6247                         WREG32(mc_cg_registers[i], data);
6248         }
6249 }
6250
6251 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6252                                bool enable)
6253 {
6254         int i;
6255         u32 orig, data;
6256
6257         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6258                 orig = data = RREG32(mc_cg_registers[i]);
6259                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6260                         data |= MC_CG_ENABLE;
6261                 else
6262                         data &= ~MC_CG_ENABLE;
6263                 if (data != orig)
6264                         WREG32(mc_cg_registers[i], data);
6265         }
6266 }
6267
6268 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6269                                  bool enable)
6270 {
6271         u32 orig, data;
6272
6273         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6274                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6275                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6276         } else {
6277                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6278                 data |= 0xff000000;
6279                 if (data != orig)
6280                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6281
6282                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6283                 data |= 0xff000000;
6284                 if (data != orig)
6285                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6286         }
6287 }
6288
6289 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6290                                  bool enable)
6291 {
6292         u32 orig, data;
6293
6294         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6295                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6296                 data |= 0x100;
6297                 if (orig != data)
6298                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6299
6300                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6301                 data |= 0x100;
6302                 if (orig != data)
6303                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6304         } else {
6305                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6306                 data &= ~0x100;
6307                 if (orig != data)
6308                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6309
6310                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6311                 data &= ~0x100;
6312                 if (orig != data)
6313                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6314         }
6315 }
6316
6317 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6318                                 bool enable)
6319 {
6320         u32 orig, data;
6321
6322         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6323                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6324                 data = 0xfff;
6325                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6326
6327                 orig = data = RREG32(UVD_CGC_CTRL);
6328                 data |= DCM;
6329                 if (orig != data)
6330                         WREG32(UVD_CGC_CTRL, data);
6331         } else {
6332                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6333                 data &= ~0xfff;
6334                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6335
6336                 orig = data = RREG32(UVD_CGC_CTRL);
6337                 data &= ~DCM;
6338                 if (orig != data)
6339                         WREG32(UVD_CGC_CTRL, data);
6340         }
6341 }
6342
6343 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6344                                bool enable)
6345 {
6346         u32 orig, data;
6347
6348         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6349
6350         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6351                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6352                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6353         else
6354                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6355                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6356
6357         if (orig != data)
6358                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6359 }
6360
6361 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6362                                 bool enable)
6363 {
6364         u32 orig, data;
6365
6366         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6367
6368         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6369                 data &= ~CLOCK_GATING_DIS;
6370         else
6371                 data |= CLOCK_GATING_DIS;
6372
6373         if (orig != data)
6374                 WREG32(HDP_HOST_PATH_CNTL, data);
6375 }
6376
6377 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6378                               bool enable)
6379 {
6380         u32 orig, data;
6381
6382         orig = data = RREG32(HDP_MEM_POWER_LS);
6383
6384         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6385                 data |= HDP_LS_ENABLE;
6386         else
6387                 data &= ~HDP_LS_ENABLE;
6388
6389         if (orig != data)
6390                 WREG32(HDP_MEM_POWER_LS, data);
6391 }
6392
6393 void cik_update_cg(struct radeon_device *rdev,
6394                    u32 block, bool enable)
6395 {
6396
6397         if (block & RADEON_CG_BLOCK_GFX) {
6398                 cik_enable_gui_idle_interrupt(rdev, false);
6399                 /* order matters! */
6400                 if (enable) {
6401                         cik_enable_mgcg(rdev, true);
6402                         cik_enable_cgcg(rdev, true);
6403                 } else {
6404                         cik_enable_cgcg(rdev, false);
6405                         cik_enable_mgcg(rdev, false);
6406                 }
6407                 cik_enable_gui_idle_interrupt(rdev, true);
6408         }
6409
6410         if (block & RADEON_CG_BLOCK_MC) {
6411                 if (!(rdev->flags & RADEON_IS_IGP)) {
6412                         cik_enable_mc_mgcg(rdev, enable);
6413                         cik_enable_mc_ls(rdev, enable);
6414                 }
6415         }
6416
6417         if (block & RADEON_CG_BLOCK_SDMA) {
6418                 cik_enable_sdma_mgcg(rdev, enable);
6419                 cik_enable_sdma_mgls(rdev, enable);
6420         }
6421
6422         if (block & RADEON_CG_BLOCK_BIF) {
6423                 cik_enable_bif_mgls(rdev, enable);
6424         }
6425
6426         if (block & RADEON_CG_BLOCK_UVD) {
6427                 if (rdev->has_uvd)
6428                         cik_enable_uvd_mgcg(rdev, enable);
6429         }
6430
6431         if (block & RADEON_CG_BLOCK_HDP) {
6432                 cik_enable_hdp_mgcg(rdev, enable);
6433                 cik_enable_hdp_ls(rdev, enable);
6434         }
6435
6436         if (block & RADEON_CG_BLOCK_VCE) {
6437                 vce_v2_0_enable_mgcg(rdev, enable);
6438         }
6439 }
6440
6441 static void cik_init_cg(struct radeon_device *rdev)
6442 {
6443
6444         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6445
6446         if (rdev->has_uvd)
6447                 si_init_uvd_internal_cg(rdev);
6448
6449         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6450                              RADEON_CG_BLOCK_SDMA |
6451                              RADEON_CG_BLOCK_BIF |
6452                              RADEON_CG_BLOCK_UVD |
6453                              RADEON_CG_BLOCK_HDP), true);
6454 }
6455
6456 static void cik_fini_cg(struct radeon_device *rdev)
6457 {
6458         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6459                              RADEON_CG_BLOCK_SDMA |
6460                              RADEON_CG_BLOCK_BIF |
6461                              RADEON_CG_BLOCK_UVD |
6462                              RADEON_CG_BLOCK_HDP), false);
6463
6464         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6465 }
6466
6467 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6468                                           bool enable)
6469 {
6470         u32 data, orig;
6471
6472         orig = data = RREG32(RLC_PG_CNTL);
6473         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6474                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6475         else
6476                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6477         if (orig != data)
6478                 WREG32(RLC_PG_CNTL, data);
6479 }
6480
6481 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6482                                           bool enable)
6483 {
6484         u32 data, orig;
6485
6486         orig = data = RREG32(RLC_PG_CNTL);
6487         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6488                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6489         else
6490                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6491         if (orig != data)
6492                 WREG32(RLC_PG_CNTL, data);
6493 }
6494
6495 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6496 {
6497         u32 data, orig;
6498
6499         orig = data = RREG32(RLC_PG_CNTL);
6500         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6501                 data &= ~DISABLE_CP_PG;
6502         else
6503                 data |= DISABLE_CP_PG;
6504         if (orig != data)
6505                 WREG32(RLC_PG_CNTL, data);
6506 }
6507
6508 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6509 {
6510         u32 data, orig;
6511
6512         orig = data = RREG32(RLC_PG_CNTL);
6513         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6514                 data &= ~DISABLE_GDS_PG;
6515         else
6516                 data |= DISABLE_GDS_PG;
6517         if (orig != data)
6518                 WREG32(RLC_PG_CNTL, data);
6519 }
6520
6521 #define CP_ME_TABLE_SIZE    96
6522 #define CP_ME_TABLE_OFFSET  2048
6523 #define CP_MEC_TABLE_OFFSET 4096
6524
6525 void cik_init_cp_pg_table(struct radeon_device *rdev)
6526 {
6527         volatile u32 *dst_ptr;
6528         int me, i, max_me = 4;
6529         u32 bo_offset = 0;
6530         u32 table_offset, table_size;
6531
6532         if (rdev->family == CHIP_KAVERI)
6533                 max_me = 5;
6534
6535         if (rdev->rlc.cp_table_ptr == NULL)
6536                 return;
6537
6538         /* write the cp table buffer */
6539         dst_ptr = rdev->rlc.cp_table_ptr;
6540         for (me = 0; me < max_me; me++) {
6541                 if (rdev->new_fw) {
6542                         const __le32 *fw_data;
6543                         const struct gfx_firmware_header_v1_0 *hdr;
6544
6545                         if (me == 0) {
6546                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6547                                 fw_data = (const __le32 *)
6548                                         ((const char *)rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549                                 table_offset = le32_to_cpu(hdr->jt_offset);
6550                                 table_size = le32_to_cpu(hdr->jt_size);
6551                         } else if (me == 1) {
6552                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6553                                 fw_data = (const __le32 *)
6554                                         ((const char *)rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6555                                 table_offset = le32_to_cpu(hdr->jt_offset);
6556                                 table_size = le32_to_cpu(hdr->jt_size);
6557                         } else if (me == 2) {
6558                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6559                                 fw_data = (const __le32 *)
6560                                         ((const char *)rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6561                                 table_offset = le32_to_cpu(hdr->jt_offset);
6562                                 table_size = le32_to_cpu(hdr->jt_size);
6563                         } else if (me == 3) {
6564                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6565                                 fw_data = (const __le32 *)
6566                                         ((const char *)rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6567                                 table_offset = le32_to_cpu(hdr->jt_offset);
6568                                 table_size = le32_to_cpu(hdr->jt_size);
6569                         } else {
6570                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6571                                 fw_data = (const __le32 *)
6572                                         ((const char *)rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6573                                 table_offset = le32_to_cpu(hdr->jt_offset);
6574                                 table_size = le32_to_cpu(hdr->jt_size);
6575                         }
6576
6577                         for (i = 0; i < table_size; i ++) {
6578                                 dst_ptr[bo_offset + i] =
6579                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6580                         }
6581                         bo_offset += table_size;
6582                 } else {
6583                         const __be32 *fw_data;
6584                         table_size = CP_ME_TABLE_SIZE;
6585
6586                         if (me == 0) {
6587                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6588                                 table_offset = CP_ME_TABLE_OFFSET;
6589                         } else if (me == 1) {
6590                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6591                                 table_offset = CP_ME_TABLE_OFFSET;
6592                         } else if (me == 2) {
6593                                 fw_data = (const __be32 *)rdev->me_fw->data;
6594                                 table_offset = CP_ME_TABLE_OFFSET;
6595                         } else {
6596                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6597                                 table_offset = CP_MEC_TABLE_OFFSET;
6598                         }
6599
6600                         for (i = 0; i < table_size; i ++) {
6601                                 dst_ptr[bo_offset + i] =
6602                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6603                         }
6604                         bo_offset += table_size;
6605                 }
6606         }
6607 }
6608
6609 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6610                                 bool enable)
6611 {
6612         u32 data, orig;
6613
6614         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6615                 orig = data = RREG32(RLC_PG_CNTL);
6616                 data |= GFX_PG_ENABLE;
6617                 if (orig != data)
6618                         WREG32(RLC_PG_CNTL, data);
6619
6620                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6621                 data |= AUTO_PG_EN;
6622                 if (orig != data)
6623                         WREG32(RLC_AUTO_PG_CTRL, data);
6624         } else {
6625                 orig = data = RREG32(RLC_PG_CNTL);
6626                 data &= ~GFX_PG_ENABLE;
6627                 if (orig != data)
6628                         WREG32(RLC_PG_CNTL, data);
6629
6630                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6631                 data &= ~AUTO_PG_EN;
6632                 if (orig != data)
6633                         WREG32(RLC_AUTO_PG_CTRL, data);
6634
6635                 data = RREG32(DB_RENDER_CONTROL);
6636         }
6637 }
6638
6639 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6640 {
6641         u32 mask = 0, tmp, tmp1;
6642         int i;
6643
6644         mutex_lock(&rdev->grbm_idx_mutex);
6645         cik_select_se_sh(rdev, se, sh);
6646         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6647         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6648         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6649         mutex_unlock(&rdev->grbm_idx_mutex);
6650
6651         tmp &= 0xffff0000;
6652
6653         tmp |= tmp1;
6654         tmp >>= 16;
6655
6656         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6657                 mask <<= 1;
6658                 mask |= 1;
6659         }
6660
6661         return (~tmp) & mask;
6662 }
6663
6664 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6665 {
6666         u32 i, j, k, active_cu_number = 0;
6667         u32 mask, counter, cu_bitmap;
6668         u32 tmp = 0;
6669
6670         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6671                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6672                         mask = 1;
6673                         cu_bitmap = 0;
6674                         counter = 0;
6675                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6676                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6677                                         if (counter < 2)
6678                                                 cu_bitmap |= mask;
6679                                         counter ++;
6680                                 }
6681                                 mask <<= 1;
6682                         }
6683
6684                         active_cu_number += counter;
6685                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6686                 }
6687         }
6688
6689         WREG32(RLC_PG_AO_CU_MASK, tmp);
6690
6691         tmp = RREG32(RLC_MAX_PG_CU);
6692         tmp &= ~MAX_PU_CU_MASK;
6693         tmp |= MAX_PU_CU(active_cu_number);
6694         WREG32(RLC_MAX_PG_CU, tmp);
6695 }
6696
6697 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6698                                        bool enable)
6699 {
6700         u32 data, orig;
6701
6702         orig = data = RREG32(RLC_PG_CNTL);
6703         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6704                 data |= STATIC_PER_CU_PG_ENABLE;
6705         else
6706                 data &= ~STATIC_PER_CU_PG_ENABLE;
6707         if (orig != data)
6708                 WREG32(RLC_PG_CNTL, data);
6709 }
6710
6711 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6712                                         bool enable)
6713 {
6714         u32 data, orig;
6715
6716         orig = data = RREG32(RLC_PG_CNTL);
6717         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6718                 data |= DYN_PER_CU_PG_ENABLE;
6719         else
6720                 data &= ~DYN_PER_CU_PG_ENABLE;
6721         if (orig != data)
6722                 WREG32(RLC_PG_CNTL, data);
6723 }
6724
6725 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6726 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6727
6728 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6729 {
6730         u32 data, orig;
6731         u32 i;
6732
6733         if (rdev->rlc.cs_data) {
6734                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6735                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6736                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6737                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6738         } else {
6739                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6740                 for (i = 0; i < 3; i++)
6741                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6742         }
6743         if (rdev->rlc.reg_list) {
6744                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6745                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6746                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6747         }
6748
6749         orig = data = RREG32(RLC_PG_CNTL);
6750         data |= GFX_PG_SRC;
6751         if (orig != data)
6752                 WREG32(RLC_PG_CNTL, data);
6753
6754         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6755         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6756
6757         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6758         data &= ~IDLE_POLL_COUNT_MASK;
6759         data |= IDLE_POLL_COUNT(0x60);
6760         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6761
6762         data = 0x10101010;
6763         WREG32(RLC_PG_DELAY, data);
6764
6765         data = RREG32(RLC_PG_DELAY_2);
6766         data &= ~0xff;
6767         data |= 0x3;
6768         WREG32(RLC_PG_DELAY_2, data);
6769
6770         data = RREG32(RLC_AUTO_PG_CTRL);
6771         data &= ~GRBM_REG_SGIT_MASK;
6772         data |= GRBM_REG_SGIT(0x700);
6773         WREG32(RLC_AUTO_PG_CTRL, data);
6774
6775 }
6776
6777 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6778 {
6779         cik_enable_gfx_cgpg(rdev, enable);
6780         cik_enable_gfx_static_mgpg(rdev, enable);
6781         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6782 }
6783
6784 u32 cik_get_csb_size(struct radeon_device *rdev)
6785 {
6786         u32 count = 0;
6787         const struct cs_section_def *sect = NULL;
6788         const struct cs_extent_def *ext = NULL;
6789
6790         if (rdev->rlc.cs_data == NULL)
6791                 return 0;
6792
6793         /* begin clear state */
6794         count += 2;
6795         /* context control state */
6796         count += 3;
6797
6798         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6799                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6800                         if (sect->id == SECT_CONTEXT)
6801                                 count += 2 + ext->reg_count;
6802                         else
6803                                 return 0;
6804                 }
6805         }
6806         /* pa_sc_raster_config/pa_sc_raster_config1 */
6807         count += 4;
6808         /* end clear state */
6809         count += 2;
6810         /* clear state */
6811         count += 2;
6812
6813         return count;
6814 }
6815
6816 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6817 {
6818         u32 count = 0, i;
6819         const struct cs_section_def *sect = NULL;
6820         const struct cs_extent_def *ext = NULL;
6821
6822         if (rdev->rlc.cs_data == NULL)
6823                 return;
6824         if (buffer == NULL)
6825                 return;
6826
6827         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6828         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6829
6830         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6831         buffer[count++] = cpu_to_le32(0x80000000);
6832         buffer[count++] = cpu_to_le32(0x80000000);
6833
6834         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6835                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6836                         if (sect->id == SECT_CONTEXT) {
6837                                 buffer[count++] =
6838                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6839                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6840                                 for (i = 0; i < ext->reg_count; i++)
6841                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6842                         } else {
6843                                 return;
6844                         }
6845                 }
6846         }
6847
6848         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6849         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6850         switch (rdev->family) {
6851         case CHIP_BONAIRE:
6852                 buffer[count++] = cpu_to_le32(0x16000012);
6853                 buffer[count++] = cpu_to_le32(0x00000000);
6854                 break;
6855         case CHIP_KAVERI:
6856                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6857                 buffer[count++] = cpu_to_le32(0x00000000);
6858                 break;
6859         case CHIP_KABINI:
6860         case CHIP_MULLINS:
6861                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6862                 buffer[count++] = cpu_to_le32(0x00000000);
6863                 break;
6864         case CHIP_HAWAII:
6865                 buffer[count++] = cpu_to_le32(0x3a00161a);
6866                 buffer[count++] = cpu_to_le32(0x0000002e);
6867                 break;
6868         default:
6869                 buffer[count++] = cpu_to_le32(0x00000000);
6870                 buffer[count++] = cpu_to_le32(0x00000000);
6871                 break;
6872         }
6873
6874         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6875         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6876
6877         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6878         buffer[count++] = cpu_to_le32(0);
6879 }
6880
6881 static void cik_init_pg(struct radeon_device *rdev)
6882 {
6883         if (rdev->pg_flags) {
6884                 cik_enable_sck_slowdown_on_pu(rdev, true);
6885                 cik_enable_sck_slowdown_on_pd(rdev, true);
6886                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6887                         cik_init_gfx_cgpg(rdev);
6888                         cik_enable_cp_pg(rdev, true);
6889                         cik_enable_gds_pg(rdev, true);
6890                 }
6891                 cik_init_ao_cu_mask(rdev);
6892                 cik_update_gfx_pg(rdev, true);
6893         }
6894 }
6895
6896 static void cik_fini_pg(struct radeon_device *rdev)
6897 {
6898         if (rdev->pg_flags) {
6899                 cik_update_gfx_pg(rdev, false);
6900                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6901                         cik_enable_cp_pg(rdev, false);
6902                         cik_enable_gds_pg(rdev, false);
6903                 }
6904         }
6905 }
6906
6907 /*
6908  * Interrupts
6909  * Starting with r6xx, interrupts are handled via a ring buffer.
6910  * Ring buffers are areas of GPU accessible memory that the GPU
6911  * writes interrupt vectors into and the host reads vectors out of.
6912  * There is a rptr (read pointer) that determines where the
6913  * host is currently reading, and a wptr (write pointer)
6914  * which determines where the GPU has written.  When the
6915  * pointers are equal, the ring is idle.  When the GPU
6916  * writes vectors to the ring buffer, it increments the
6917  * wptr.  When there is an interrupt, the host then starts
6918  * fetching commands and processing them until the pointers are
6919  * equal again at which point it updates the rptr.
6920  */
6921
6922 /**
6923  * cik_enable_interrupts - Enable the interrupt ring buffer
6924  *
6925  * @rdev: radeon_device pointer
6926  *
6927  * Enable the interrupt ring buffer (CIK).
6928  */
6929 static void cik_enable_interrupts(struct radeon_device *rdev)
6930 {
6931         u32 ih_cntl = RREG32(IH_CNTL);
6932         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6933
6934         ih_cntl |= ENABLE_INTR;
6935         ih_rb_cntl |= IH_RB_ENABLE;
6936         WREG32(IH_CNTL, ih_cntl);
6937         WREG32(IH_RB_CNTL, ih_rb_cntl);
6938         rdev->ih.enabled = true;
6939 }
6940
6941 /**
6942  * cik_disable_interrupts - Disable the interrupt ring buffer
6943  *
6944  * @rdev: radeon_device pointer
6945  *
6946  * Disable the interrupt ring buffer (CIK).
6947  */
6948 static void cik_disable_interrupts(struct radeon_device *rdev)
6949 {
6950         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6951         u32 ih_cntl = RREG32(IH_CNTL);
6952
6953         ih_rb_cntl &= ~IH_RB_ENABLE;
6954         ih_cntl &= ~ENABLE_INTR;
6955         WREG32(IH_RB_CNTL, ih_rb_cntl);
6956         WREG32(IH_CNTL, ih_cntl);
6957         /* set rptr, wptr to 0 */
6958         WREG32(IH_RB_RPTR, 0);
6959         WREG32(IH_RB_WPTR, 0);
6960         rdev->ih.enabled = false;
6961         rdev->ih.rptr = 0;
6962 }
6963
6964 /**
6965  * cik_disable_interrupt_state - Disable all interrupt sources
6966  *
6967  * @rdev: radeon_device pointer
6968  *
6969  * Clear all interrupt enable bits used by the driver (CIK).
6970  */
6971 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6972 {
6973         u32 tmp;
6974
6975         /* gfx ring */
6976         tmp = RREG32(CP_INT_CNTL_RING0) &
6977                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6978         WREG32(CP_INT_CNTL_RING0, tmp);
6979         /* sdma */
6980         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6981         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6982         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6983         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6984         /* compute queues */
6985         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6986         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6987         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6988         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6989         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6990         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6991         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6992         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6993         /* grbm */
6994         WREG32(GRBM_INT_CNTL, 0);
6995         /* SRBM */
6996         WREG32(SRBM_INT_CNTL, 0);
6997         /* vline/vblank, etc. */
6998         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6999         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7000         if (rdev->num_crtc >= 4) {
7001                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7002                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7003         }
7004         if (rdev->num_crtc >= 6) {
7005                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7006                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7007         }
7008         /* pflip */
7009         if (rdev->num_crtc >= 2) {
7010                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7011                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7012         }
7013         if (rdev->num_crtc >= 4) {
7014                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7015                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7016         }
7017         if (rdev->num_crtc >= 6) {
7018                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7019                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7020         }
7021
7022         /* dac hotplug */
7023         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7024
7025         /* digital hotplug */
7026         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7027         WREG32(DC_HPD1_INT_CONTROL, tmp);
7028         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7029         WREG32(DC_HPD2_INT_CONTROL, tmp);
7030         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7031         WREG32(DC_HPD3_INT_CONTROL, tmp);
7032         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7033         WREG32(DC_HPD4_INT_CONTROL, tmp);
7034         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7035         WREG32(DC_HPD5_INT_CONTROL, tmp);
7036         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7037         WREG32(DC_HPD6_INT_CONTROL, tmp);
7038
7039 }
7040
7041 /**
7042  * cik_irq_init - init and enable the interrupt ring
7043  *
7044  * @rdev: radeon_device pointer
7045  *
7046  * Allocate a ring buffer for the interrupt controller,
7047  * enable the RLC, disable interrupts, enable the IH
7048  * ring buffer and enable it (CIK).
7049  * Called at device load and reume.
7050  * Returns 0 for success, errors for failure.
7051  */
7052 static int cik_irq_init(struct radeon_device *rdev)
7053 {
7054         int ret = 0;
7055         int rb_bufsz;
7056         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7057
7058         /* allocate ring */
7059         ret = r600_ih_ring_alloc(rdev);
7060         if (ret)
7061                 return ret;
7062
7063         /* disable irqs */
7064         cik_disable_interrupts(rdev);
7065
7066         /* init rlc */
7067         ret = cik_rlc_resume(rdev);
7068         if (ret) {
7069                 r600_ih_ring_fini(rdev);
7070                 return ret;
7071         }
7072
7073         /* setup interrupt control */
7074         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7075         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7076         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7077         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7078          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7079          */
7080         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7081         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7082         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7083         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7084
7085         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7086         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7087
7088         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7089                       IH_WPTR_OVERFLOW_CLEAR |
7090                       (rb_bufsz << 1));
7091
7092         if (rdev->wb.enabled)
7093                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7094
7095         /* set the writeback address whether it's enabled or not */
7096         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7097         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7098
7099         WREG32(IH_RB_CNTL, ih_rb_cntl);
7100
7101         /* set rptr, wptr to 0 */
7102         WREG32(IH_RB_RPTR, 0);
7103         WREG32(IH_RB_WPTR, 0);
7104
7105         /* Default settings for IH_CNTL (disabled at first) */
7106         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7107         /* RPTR_REARM only works if msi's are enabled */
7108         if (rdev->msi_enabled)
7109                 ih_cntl |= RPTR_REARM;
7110         WREG32(IH_CNTL, ih_cntl);
7111
7112         /* force the active interrupt state to all disabled */
7113         cik_disable_interrupt_state(rdev);
7114
7115         pci_enable_busmaster(rdev->pdev->dev.bsddev);
7116
7117         /* enable irqs */
7118         cik_enable_interrupts(rdev);
7119
7120         return ret;
7121 }
7122
7123 /**
7124  * cik_irq_set - enable/disable interrupt sources
7125  *
7126  * @rdev: radeon_device pointer
7127  *
7128  * Enable interrupt sources on the GPU (vblanks, hpd,
7129  * etc.) (CIK).
7130  * Returns 0 for success, errors for failure.
7131  */
7132 int cik_irq_set(struct radeon_device *rdev)
7133 {
7134         u32 cp_int_cntl;
7135         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7136         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7137         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7138         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7139         u32 grbm_int_cntl = 0;
7140         u32 dma_cntl, dma_cntl1;
7141
7142         if (!rdev->irq.installed) {
7143                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7144                 return -EINVAL;
7145         }
7146         /* don't enable anything if the ih is disabled */
7147         if (!rdev->ih.enabled) {
7148                 cik_disable_interrupts(rdev);
7149                 /* force the active interrupt state to all disabled */
7150                 cik_disable_interrupt_state(rdev);
7151                 return 0;
7152         }
7153
7154         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7155                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7156         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7157
7158         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7159         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7160         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7161         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7162         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7163         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7164
7165         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7166         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7167
7168         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7169         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7170         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7171         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7172         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7173         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7174         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7175         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7176
7177         /* enable CP interrupts on all rings */
7178         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7179                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7180                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7181         }
7182         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7183                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7184                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7185                 if (ring->me == 1) {
7186                         switch (ring->pipe) {
7187                         case 0:
7188                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7189                                 break;
7190                         case 1:
7191                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7192                                 break;
7193                         case 2:
7194                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7195                                 break;
7196                         case 3:
7197                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7198                                 break;
7199                         default:
7200                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7201                                 break;
7202                         }
7203                 } else if (ring->me == 2) {
7204                         switch (ring->pipe) {
7205                         case 0:
7206                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7207                                 break;
7208                         case 1:
7209                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7210                                 break;
7211                         case 2:
7212                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7213                                 break;
7214                         case 3:
7215                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7216                                 break;
7217                         default:
7218                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7219                                 break;
7220                         }
7221                 } else {
7222                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7223                 }
7224         }
7225         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7226                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7227                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7228                 if (ring->me == 1) {
7229                         switch (ring->pipe) {
7230                         case 0:
7231                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7232                                 break;
7233                         case 1:
7234                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7235                                 break;
7236                         case 2:
7237                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7238                                 break;
7239                         case 3:
7240                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7241                                 break;
7242                         default:
7243                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7244                                 break;
7245                         }
7246                 } else if (ring->me == 2) {
7247                         switch (ring->pipe) {
7248                         case 0:
7249                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7250                                 break;
7251                         case 1:
7252                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7253                                 break;
7254                         case 2:
7255                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7256                                 break;
7257                         case 3:
7258                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7259                                 break;
7260                         default:
7261                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7262                                 break;
7263                         }
7264                 } else {
7265                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7266                 }
7267         }
7268
7269         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7270                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7271                 dma_cntl |= TRAP_ENABLE;
7272         }
7273
7274         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7275                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7276                 dma_cntl1 |= TRAP_ENABLE;
7277         }
7278
7279         if (rdev->irq.crtc_vblank_int[0] ||
7280             atomic_read(&rdev->irq.pflip[0])) {
7281                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7282                 crtc1 |= VBLANK_INTERRUPT_MASK;
7283         }
7284         if (rdev->irq.crtc_vblank_int[1] ||
7285             atomic_read(&rdev->irq.pflip[1])) {
7286                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7287                 crtc2 |= VBLANK_INTERRUPT_MASK;
7288         }
7289         if (rdev->irq.crtc_vblank_int[2] ||
7290             atomic_read(&rdev->irq.pflip[2])) {
7291                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7292                 crtc3 |= VBLANK_INTERRUPT_MASK;
7293         }
7294         if (rdev->irq.crtc_vblank_int[3] ||
7295             atomic_read(&rdev->irq.pflip[3])) {
7296                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7297                 crtc4 |= VBLANK_INTERRUPT_MASK;
7298         }
7299         if (rdev->irq.crtc_vblank_int[4] ||
7300             atomic_read(&rdev->irq.pflip[4])) {
7301                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7302                 crtc5 |= VBLANK_INTERRUPT_MASK;
7303         }
7304         if (rdev->irq.crtc_vblank_int[5] ||
7305             atomic_read(&rdev->irq.pflip[5])) {
7306                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7307                 crtc6 |= VBLANK_INTERRUPT_MASK;
7308         }
7309         if (rdev->irq.hpd[0]) {
7310                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7311                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7312         }
7313         if (rdev->irq.hpd[1]) {
7314                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7315                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7316         }
7317         if (rdev->irq.hpd[2]) {
7318                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7319                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7320         }
7321         if (rdev->irq.hpd[3]) {
7322                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7323                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7324         }
7325         if (rdev->irq.hpd[4]) {
7326                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7327                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7328         }
7329         if (rdev->irq.hpd[5]) {
7330                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7331                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7332         }
7333
7334         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7335
7336         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7337         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7338
7339         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7340         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7341         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7342         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7343         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7344         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7345         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7346         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7347
7348         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7349
7350         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7351         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7352         if (rdev->num_crtc >= 4) {
7353                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7354                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7355         }
7356         if (rdev->num_crtc >= 6) {
7357                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7358                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7359         }
7360
7361         if (rdev->num_crtc >= 2) {
7362                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7363                        GRPH_PFLIP_INT_MASK);
7364                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7365                        GRPH_PFLIP_INT_MASK);
7366         }
7367         if (rdev->num_crtc >= 4) {
7368                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7369                        GRPH_PFLIP_INT_MASK);
7370                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7371                        GRPH_PFLIP_INT_MASK);
7372         }
7373         if (rdev->num_crtc >= 6) {
7374                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7375                        GRPH_PFLIP_INT_MASK);
7376                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7377                        GRPH_PFLIP_INT_MASK);
7378         }
7379
7380         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7381         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7382         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7383         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7384         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7385         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7386
7387         /* posting read */
7388         RREG32(SRBM_STATUS);
7389
7390         return 0;
7391 }
7392
7393 /**
7394  * cik_irq_ack - ack interrupt sources
7395  *
7396  * @rdev: radeon_device pointer
7397  *
7398  * Ack interrupt sources on the GPU (vblanks, hpd,
7399  * etc.) (CIK).  Certain interrupts sources are sw
7400  * generated and do not require an explicit ack.
7401  */
7402 static inline void cik_irq_ack(struct radeon_device *rdev)
7403 {
7404         u32 tmp;
7405
7406         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7407         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7408         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7409         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7410         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7411         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7412         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7413
7414         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7415                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7416         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7417                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7418         if (rdev->num_crtc >= 4) {
7419                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7420                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7421                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7422                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7423         }
7424         if (rdev->num_crtc >= 6) {
7425                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7426                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7427                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7428                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7429         }
7430
7431         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7432                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7433                        GRPH_PFLIP_INT_CLEAR);
7434         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7435                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7436                        GRPH_PFLIP_INT_CLEAR);
7437         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7438                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7439         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7440                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7441         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7442                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7443         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7444                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7445
7446         if (rdev->num_crtc >= 4) {
7447                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7448                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7449                                GRPH_PFLIP_INT_CLEAR);
7450                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7451                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7452                                GRPH_PFLIP_INT_CLEAR);
7453                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7454                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7455                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7456                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7457                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7458                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7459                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7460                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7461         }
7462
7463         if (rdev->num_crtc >= 6) {
7464                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7465                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7466                                GRPH_PFLIP_INT_CLEAR);
7467                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7468                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7469                                GRPH_PFLIP_INT_CLEAR);
7470                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7471                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7472                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7473                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7474                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7475                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7476                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7477                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7478         }
7479
7480         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7481                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7482                 tmp |= DC_HPDx_INT_ACK;
7483                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7484         }
7485         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7486                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7487                 tmp |= DC_HPDx_INT_ACK;
7488                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7489         }
7490         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7491                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7492                 tmp |= DC_HPDx_INT_ACK;
7493                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7494         }
7495         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7496                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7497                 tmp |= DC_HPDx_INT_ACK;
7498                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7499         }
7500         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7501                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7502                 tmp |= DC_HPDx_INT_ACK;
7503                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7504         }
7505         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7506                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7507                 tmp |= DC_HPDx_INT_ACK;
7508                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7509         }
7510         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7511                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7512                 tmp |= DC_HPDx_RX_INT_ACK;
7513                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7514         }
7515         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7516                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7517                 tmp |= DC_HPDx_RX_INT_ACK;
7518                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7519         }
7520         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7521                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7522                 tmp |= DC_HPDx_RX_INT_ACK;
7523                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7524         }
7525         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7526                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7527                 tmp |= DC_HPDx_RX_INT_ACK;
7528                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7529         }
7530         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7531                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7532                 tmp |= DC_HPDx_RX_INT_ACK;
7533                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7534         }
7535         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7536                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7537                 tmp |= DC_HPDx_RX_INT_ACK;
7538                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7539         }
7540 }
7541
7542 /**
7543  * cik_irq_disable - disable interrupts
7544  *
7545  * @rdev: radeon_device pointer
7546  *
7547  * Disable interrupts on the hw (CIK).
7548  */
7549 static void cik_irq_disable(struct radeon_device *rdev)
7550 {
7551         cik_disable_interrupts(rdev);
7552         /* Wait and acknowledge irq */
7553         mdelay(1);
7554         cik_irq_ack(rdev);
7555         cik_disable_interrupt_state(rdev);
7556 }
7557
7558 /**
7559  * cik_irq_disable - disable interrupts for suspend
7560  *
7561  * @rdev: radeon_device pointer
7562  *
7563  * Disable interrupts and stop the RLC (CIK).
7564  * Used for suspend.
7565  */
7566 static void cik_irq_suspend(struct radeon_device *rdev)
7567 {
7568         cik_irq_disable(rdev);
7569         cik_rlc_stop(rdev);
7570 }
7571
7572 /**
7573  * cik_irq_fini - tear down interrupt support
7574  *
7575  * @rdev: radeon_device pointer
7576  *
7577  * Disable interrupts on the hw and free the IH ring
7578  * buffer (CIK).
7579  * Used for driver unload.
7580  */
7581 static void cik_irq_fini(struct radeon_device *rdev)
7582 {
7583         cik_irq_suspend(rdev);
7584         r600_ih_ring_fini(rdev);
7585 }
7586
7587 /**
7588  * cik_get_ih_wptr - get the IH ring buffer wptr
7589  *
7590  * @rdev: radeon_device pointer
7591  *
7592  * Get the IH ring buffer wptr from either the register
7593  * or the writeback memory buffer (CIK).  Also check for
7594  * ring buffer overflow and deal with it.
7595  * Used by cik_irq_process().
7596  * Returns the value of the wptr.
7597  */
7598 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7599 {
7600         u32 wptr, tmp;
7601
7602         if (rdev->wb.enabled)
7603                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7604         else
7605                 wptr = RREG32(IH_RB_WPTR);
7606
7607         if (wptr & RB_OVERFLOW) {
7608                 wptr &= ~RB_OVERFLOW;
7609                 /* When a ring buffer overflow happen start parsing interrupt
7610                  * from the last not overwritten vector (wptr + 16). Hopefully
7611                  * this should allow us to catchup.
7612                  */
7613                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7614                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7615                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7616                 tmp = RREG32(IH_RB_CNTL);
7617                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7618                 WREG32(IH_RB_CNTL, tmp);
7619         }
7620         return (wptr & rdev->ih.ptr_mask);
7621 }
7622
7623 /*        CIK IV Ring
7624  * Each IV ring entry is 128 bits:
7625  * [7:0]    - interrupt source id
7626  * [31:8]   - reserved
7627  * [59:32]  - interrupt source data
7628  * [63:60]  - reserved
7629  * [71:64]  - RINGID
7630  *            CP:
7631  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7632  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7633  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7634  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7635  *            PIPE_ID - ME0 0=3D
7636  *                    - ME1&2 compute dispatcher (4 pipes each)
7637  *            SDMA:
7638  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7639  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7640  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7641  * [79:72]  - VMID
7642  * [95:80]  - PASID
7643  * [127:96] - reserved
7644  */
7645 /**
7646  * cik_irq_process - interrupt handler
7647  *
7648  * @rdev: radeon_device pointer
7649  *
7650  * Interrupt hander (CIK).  Walk the IH ring,
7651  * ack interrupts and schedule work to handle
7652  * interrupt events.
7653  * Returns irq process return code.
7654  */
7655 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7656 {
7657         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7658         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7659         u32 wptr;
7660         u32 rptr;
7661         u32 src_id, src_data, ring_id;
7662         u8 me_id, pipe_id, queue_id;
7663         u32 ring_index;
7664         bool queue_hotplug = false;
7665         bool queue_dp = false;
7666         bool queue_reset = false;
7667         u32 addr, status, mc_client;
7668         bool queue_thermal = false;
7669
7670         if (!rdev->ih.enabled || rdev->shutdown)
7671                 return IRQ_NONE;
7672
7673         wptr = cik_get_ih_wptr(rdev);
7674
7675 restart_ih:
7676         /* is somebody else already processing irqs? */
7677         if (atomic_xchg(&rdev->ih.lock, 1))
7678                 return IRQ_NONE;
7679
7680         rptr = rdev->ih.rptr;
7681         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7682
7683         /* Order reading of wptr vs. reading of IH ring data */
7684         rmb();
7685
7686         /* display interrupts */
7687         cik_irq_ack(rdev);
7688
7689         while (rptr != wptr) {
7690                 /* wptr/rptr are in bytes! */
7691                 ring_index = rptr / 4;
7692                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7693                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7694                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7695
7696                 switch (src_id) {
7697                 case 1: /* D1 vblank/vline */
7698                         switch (src_data) {
7699                         case 0: /* D1 vblank */
7700                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7701                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7702
7703                                 if (rdev->irq.crtc_vblank_int[0]) {
7704                                         drm_handle_vblank(rdev->ddev, 0);
7705                                         rdev->pm.vblank_sync = true;
7706                                         wake_up(&rdev->irq.vblank_queue);
7707                                 }
7708                                 if (atomic_read(&rdev->irq.pflip[0]))
7709                                         radeon_crtc_handle_vblank(rdev, 0);
7710                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7711                                 DRM_DEBUG("IH: D1 vblank\n");
7712
7713                                 break;
7714                         case 1: /* D1 vline */
7715                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7716                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7717
7718                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7719                                 DRM_DEBUG("IH: D1 vline\n");
7720
7721                                 break;
7722                         default:
7723                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7724                                 break;
7725                         }
7726                         break;
7727                 case 2: /* D2 vblank/vline */
7728                         switch (src_data) {
7729                         case 0: /* D2 vblank */
7730                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7731                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7732
7733                                 if (rdev->irq.crtc_vblank_int[1]) {
7734                                         drm_handle_vblank(rdev->ddev, 1);
7735                                         rdev->pm.vblank_sync = true;
7736                                         wake_up(&rdev->irq.vblank_queue);
7737                                 }
7738                                 if (atomic_read(&rdev->irq.pflip[1]))
7739                                         radeon_crtc_handle_vblank(rdev, 1);
7740                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7741                                 DRM_DEBUG("IH: D2 vblank\n");
7742
7743                                 break;
7744                         case 1: /* D2 vline */
7745                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7746                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7747
7748                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7749                                 DRM_DEBUG("IH: D2 vline\n");
7750
7751                                 break;
7752                         default:
7753                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7754                                 break;
7755                         }
7756                         break;
7757                 case 3: /* D3 vblank/vline */
7758                         switch (src_data) {
7759                         case 0: /* D3 vblank */
7760                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7761                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7762
7763                                 if (rdev->irq.crtc_vblank_int[2]) {
7764                                         drm_handle_vblank(rdev->ddev, 2);
7765                                         rdev->pm.vblank_sync = true;
7766                                         wake_up(&rdev->irq.vblank_queue);
7767                                 }
7768                                 if (atomic_read(&rdev->irq.pflip[2]))
7769                                         radeon_crtc_handle_vblank(rdev, 2);
7770                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7771                                 DRM_DEBUG("IH: D3 vblank\n");
7772
7773                                 break;
7774                         case 1: /* D3 vline */
7775                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7776                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7777
7778                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7779                                 DRM_DEBUG("IH: D3 vline\n");
7780
7781                                 break;
7782                         default:
7783                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7784                                 break;
7785                         }
7786                         break;
7787                 case 4: /* D4 vblank/vline */
7788                         switch (src_data) {
7789                         case 0: /* D4 vblank */
7790                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7791                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7792
7793                                 if (rdev->irq.crtc_vblank_int[3]) {
7794                                         drm_handle_vblank(rdev->ddev, 3);
7795                                         rdev->pm.vblank_sync = true;
7796                                         wake_up(&rdev->irq.vblank_queue);
7797                                 }
7798                                 if (atomic_read(&rdev->irq.pflip[3]))
7799                                         radeon_crtc_handle_vblank(rdev, 3);
7800                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7801                                 DRM_DEBUG("IH: D4 vblank\n");
7802
7803                                 break;
7804                         case 1: /* D4 vline */
7805                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7806                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7807
7808                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7809                                 DRM_DEBUG("IH: D4 vline\n");
7810
7811                                 break;
7812                         default:
7813                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7814                                 break;
7815                         }
7816                         break;
7817                 case 5: /* D5 vblank/vline */
7818                         switch (src_data) {
7819                         case 0: /* D5 vblank */
7820                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7821                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7822
7823                                 if (rdev->irq.crtc_vblank_int[4]) {
7824                                         drm_handle_vblank(rdev->ddev, 4);
7825                                         rdev->pm.vblank_sync = true;
7826                                         wake_up(&rdev->irq.vblank_queue);
7827                                 }
7828                                 if (atomic_read(&rdev->irq.pflip[4]))
7829                                         radeon_crtc_handle_vblank(rdev, 4);
7830                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7831                                 DRM_DEBUG("IH: D5 vblank\n");
7832
7833                                 break;
7834                         case 1: /* D5 vline */
7835                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7836                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7837
7838                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7839                                 DRM_DEBUG("IH: D5 vline\n");
7840
7841                                 break;
7842                         default:
7843                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7844                                 break;
7845                         }
7846                         break;
7847                 case 6: /* D6 vblank/vline */
7848                         switch (src_data) {
7849                         case 0: /* D6 vblank */
7850                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7851                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7852
7853                                 if (rdev->irq.crtc_vblank_int[5]) {
7854                                         drm_handle_vblank(rdev->ddev, 5);
7855                                         rdev->pm.vblank_sync = true;
7856                                         wake_up(&rdev->irq.vblank_queue);
7857                                 }
7858                                 if (atomic_read(&rdev->irq.pflip[5]))
7859                                         radeon_crtc_handle_vblank(rdev, 5);
7860                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7861                                 DRM_DEBUG("IH: D6 vblank\n");
7862
7863                                 break;
7864                         case 1: /* D6 vline */
7865                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7866                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7867
7868                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7869                                 DRM_DEBUG("IH: D6 vline\n");
7870
7871                                 break;
7872                         default:
7873                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7874                                 break;
7875                         }
7876                         break;
7877                 case 8: /* D1 page flip */
7878                 case 10: /* D2 page flip */
7879                 case 12: /* D3 page flip */
7880                 case 14: /* D4 page flip */
7881                 case 16: /* D5 page flip */
7882                 case 18: /* D6 page flip */
7883                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7884                         if (radeon_use_pflipirq > 0)
7885                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7886                         break;
7887                 case 42: /* HPD hotplug */
7888                         switch (src_data) {
7889                         case 0:
7890                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7891                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7892
7893                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7894                                 queue_hotplug = true;
7895                                 DRM_DEBUG("IH: HPD1\n");
7896
7897                                 break;
7898                         case 1:
7899                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7900                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7901
7902                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7903                                 queue_hotplug = true;
7904                                 DRM_DEBUG("IH: HPD2\n");
7905
7906                                 break;
7907                         case 2:
7908                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7909                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7910
7911                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7912                                 queue_hotplug = true;
7913                                 DRM_DEBUG("IH: HPD3\n");
7914
7915                                 break;
7916                         case 3:
7917                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7918                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7919
7920                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7921                                 queue_hotplug = true;
7922                                 DRM_DEBUG("IH: HPD4\n");
7923
7924                                 break;
7925                         case 4:
7926                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7927                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7928
7929                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7930                                 queue_hotplug = true;
7931                                 DRM_DEBUG("IH: HPD5\n");
7932
7933                                 break;
7934                         case 5:
7935                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7936                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7937
7938                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7939                                 queue_hotplug = true;
7940                                 DRM_DEBUG("IH: HPD6\n");
7941
7942                                 break;
7943                         case 6:
7944                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7945                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7946
7947                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7948                                 queue_dp = true;
7949                                 DRM_DEBUG("IH: HPD_RX 1\n");
7950
7951                                 break;
7952                         case 7:
7953                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7954                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7955
7956                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7957                                 queue_dp = true;
7958                                 DRM_DEBUG("IH: HPD_RX 2\n");
7959
7960                                 break;
7961                         case 8:
7962                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7963                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7964
7965                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7966                                 queue_dp = true;
7967                                 DRM_DEBUG("IH: HPD_RX 3\n");
7968
7969                                 break;
7970                         case 9:
7971                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7972                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7973
7974                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7975                                 queue_dp = true;
7976                                 DRM_DEBUG("IH: HPD_RX 4\n");
7977
7978                                 break;
7979                         case 10:
7980                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7981                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7982
7983                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7984                                 queue_dp = true;
7985                                 DRM_DEBUG("IH: HPD_RX 5\n");
7986
7987                                 break;
7988                         case 11:
7989                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7990                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7991
7992                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7993                                 queue_dp = true;
7994                                 DRM_DEBUG("IH: HPD_RX 6\n");
7995
7996                                 break;
7997                         default:
7998                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7999                                 break;
8000                         }
8001                         break;
8002                 case 96:
8003                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8004                         WREG32(SRBM_INT_ACK, 0x1);
8005                         break;
8006                 case 124: /* UVD */
8007                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8008                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8009                         break;
8010                 case 146:
8011                 case 147:
8012                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8013                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8014                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8015                         /* reset addr and status */
8016                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8017                         if (addr == 0x0 && status == 0x0)
8018                                 break;
8019                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8020                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8021                                 addr);
8022                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8023                                 status);
8024                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8025                         break;
8026                 case 167: /* VCE */
8027                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8028                         switch (src_data) {
8029                         case 0:
8030                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8031                                 break;
8032                         case 1:
8033                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8034                                 break;
8035                         default:
8036                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8037                                 break;
8038                         }
8039                         break;
8040                 case 176: /* GFX RB CP_INT */
8041                 case 177: /* GFX IB CP_INT */
8042                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8043                         break;
8044                 case 181: /* CP EOP event */
8045                         DRM_DEBUG("IH: CP EOP\n");
8046                         /* XXX check the bitfield order! */
8047                         me_id = (ring_id & 0x60) >> 5;
8048                         pipe_id = (ring_id & 0x18) >> 3;
8049                         queue_id = (ring_id & 0x7) >> 0;
8050                         switch (me_id) {
8051                         case 0:
8052                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8053                                 break;
8054                         case 1:
8055                         case 2:
8056                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8057                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8058                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8059                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8060                                 break;
8061                         }
8062                         break;
8063                 case 184: /* CP Privileged reg access */
8064                         DRM_ERROR("Illegal register access in command stream\n");
8065                         /* XXX check the bitfield order! */
8066                         me_id = (ring_id & 0x60) >> 5;
8067                         pipe_id = (ring_id & 0x18) >> 3;
8068                         queue_id = (ring_id & 0x7) >> 0;
8069                         switch (me_id) {
8070                         case 0:
8071                                 /* This results in a full GPU reset, but all we need to do is soft
8072                                  * reset the CP for gfx
8073                                  */
8074                                 queue_reset = true;
8075                                 break;
8076                         case 1:
8077                                 /* XXX compute */
8078                                 queue_reset = true;
8079                                 break;
8080                         case 2:
8081                                 /* XXX compute */
8082                                 queue_reset = true;
8083                                 break;
8084                         }
8085                         break;
8086                 case 185: /* CP Privileged inst */
8087                         DRM_ERROR("Illegal instruction in command stream\n");
8088                         /* XXX check the bitfield order! */
8089                         me_id = (ring_id & 0x60) >> 5;
8090                         pipe_id = (ring_id & 0x18) >> 3;
8091                         queue_id = (ring_id & 0x7) >> 0;
8092                         switch (me_id) {
8093                         case 0:
8094                                 /* This results in a full GPU reset, but all we need to do is soft
8095                                  * reset the CP for gfx
8096                                  */
8097                                 queue_reset = true;
8098                                 break;
8099                         case 1:
8100                                 /* XXX compute */
8101                                 queue_reset = true;
8102                                 break;
8103                         case 2:
8104                                 /* XXX compute */
8105                                 queue_reset = true;
8106                                 break;
8107                         }
8108                         break;
8109                 case 224: /* SDMA trap event */
8110                         /* XXX check the bitfield order! */
8111                         me_id = (ring_id & 0x3) >> 0;
8112                         queue_id = (ring_id & 0xc) >> 2;
8113                         DRM_DEBUG("IH: SDMA trap\n");
8114                         switch (me_id) {
8115                         case 0:
8116                                 switch (queue_id) {
8117                                 case 0:
8118                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8119                                         break;
8120                                 case 1:
8121                                         /* XXX compute */
8122                                         break;
8123                                 case 2:
8124                                         /* XXX compute */
8125                                         break;
8126                                 }
8127                                 break;
8128                         case 1:
8129                                 switch (queue_id) {
8130                                 case 0:
8131                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8132                                         break;
8133                                 case 1:
8134                                         /* XXX compute */
8135                                         break;
8136                                 case 2:
8137                                         /* XXX compute */
8138                                         break;
8139                                 }
8140                                 break;
8141                         }
8142                         break;
8143                 case 230: /* thermal low to high */
8144                         DRM_DEBUG("IH: thermal low to high\n");
8145                         rdev->pm.dpm.thermal.high_to_low = false;
8146                         queue_thermal = true;
8147                         break;
8148                 case 231: /* thermal high to low */
8149                         DRM_DEBUG("IH: thermal high to low\n");
8150                         rdev->pm.dpm.thermal.high_to_low = true;
8151                         queue_thermal = true;
8152                         break;
8153                 case 233: /* GUI IDLE */
8154                         DRM_DEBUG("IH: GUI idle\n");
8155                         break;
8156                 case 241: /* SDMA Privileged inst */
8157                 case 247: /* SDMA Privileged inst */
8158                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8159                         /* XXX check the bitfield order! */
8160                         me_id = (ring_id & 0x3) >> 0;
8161                         queue_id = (ring_id & 0xc) >> 2;
8162                         switch (me_id) {
8163                         case 0:
8164                                 switch (queue_id) {
8165                                 case 0:
8166                                         queue_reset = true;
8167                                         break;
8168                                 case 1:
8169                                         /* XXX compute */
8170                                         queue_reset = true;
8171                                         break;
8172                                 case 2:
8173                                         /* XXX compute */
8174                                         queue_reset = true;
8175                                         break;
8176                                 }
8177                                 break;
8178                         case 1:
8179                                 switch (queue_id) {
8180                                 case 0:
8181                                         queue_reset = true;
8182                                         break;
8183                                 case 1:
8184                                         /* XXX compute */
8185                                         queue_reset = true;
8186                                         break;
8187                                 case 2:
8188                                         /* XXX compute */
8189                                         queue_reset = true;
8190                                         break;
8191                                 }
8192                                 break;
8193                         }
8194                         break;
8195                 default:
8196                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8197                         break;
8198                 }
8199
8200                 /* wptr/rptr are in bytes! */
8201                 rptr += 16;
8202                 rptr &= rdev->ih.ptr_mask;
8203                 WREG32(IH_RB_RPTR, rptr);
8204         }
8205         if (queue_dp)
8206                 schedule_work(&rdev->dp_work);
8207         if (queue_hotplug)
8208                 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
8209         if (queue_reset) {
8210                 rdev->needs_reset = true;
8211                 wake_up_all(&rdev->fence_queue);
8212         }
8213         if (queue_thermal)
8214                 taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
8215         rdev->ih.rptr = rptr;
8216         atomic_set(&rdev->ih.lock, 0);
8217
8218         /* make sure wptr hasn't changed while processing */
8219         wptr = cik_get_ih_wptr(rdev);
8220         if (wptr != rptr)
8221                 goto restart_ih;
8222
8223         return IRQ_HANDLED;
8224 }
8225
8226 /*
8227  * startup/shutdown callbacks
8228  */
8229 static void cik_uvd_init(struct radeon_device *rdev)
8230 {
8231         int r;
8232
8233         if (!rdev->has_uvd)
8234                 return;
8235
8236         r = radeon_uvd_init(rdev);
8237         if (r) {
8238                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8239                 /*
8240                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8241                  * to early fails cik_uvd_start() and thus nothing happens
8242                  * there. So it is pointless to try to go through that code
8243                  * hence why we disable uvd here.
8244                  */
8245                 rdev->has_uvd = 0;
8246                 return;
8247         }
8248         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8249         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8250 }
8251
8252 static void cik_uvd_start(struct radeon_device *rdev)
8253 {
8254         int r;
8255
8256         if (!rdev->has_uvd)
8257                 return;
8258
8259         r = radeon_uvd_resume(rdev);
8260         if (r) {
8261                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8262                 goto error;
8263         }
8264         r = uvd_v4_2_resume(rdev);
8265         if (r) {
8266                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8267                 goto error;
8268         }
8269         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8270         if (r) {
8271                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8272                 goto error;
8273         }
8274         return;
8275
8276 error:
8277         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8278 }
8279
8280 static void cik_uvd_resume(struct radeon_device *rdev)
8281 {
8282         struct radeon_ring *ring;
8283         int r;
8284
8285         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8286                 return;
8287
8288         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8289         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, RADEON_CP_PACKET2);
8290         if (r) {
8291                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8292                 return;
8293         }
8294         r = uvd_v1_0_init(rdev);
8295         if (r) {
8296                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8297                 return;
8298         }
8299 }
8300
8301 static void cik_vce_init(struct radeon_device *rdev)
8302 {
8303         int r;
8304
8305         if (!rdev->has_vce)
8306                 return;
8307
8308         r = radeon_vce_init(rdev);
8309         if (r) {
8310                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8311                 /*
8312                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8313                  * to early fails cik_vce_start() and thus nothing happens
8314                  * there. So it is pointless to try to go through that code
8315                  * hence why we disable vce here.
8316                  */
8317                 rdev->has_vce = 0;
8318                 return;
8319         }
8320         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8321         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8322         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8323         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8324 }
8325
8326 static void cik_vce_start(struct radeon_device *rdev)
8327 {
8328         int r;
8329
8330         if (!rdev->has_vce)
8331                 return;
8332
8333         r = radeon_vce_resume(rdev);
8334         if (r) {
8335                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8336                 goto error;
8337         }
8338         r = vce_v2_0_resume(rdev);
8339         if (r) {
8340                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8341                 goto error;
8342         }
8343         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8344         if (r) {
8345                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8346                 goto error;
8347         }
8348         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8349         if (r) {
8350                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8351                 goto error;
8352         }
8353         return;
8354
8355 error:
8356         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8357         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8358 }
8359
8360 static void cik_vce_resume(struct radeon_device *rdev)
8361 {
8362         struct radeon_ring *ring;
8363         int r;
8364
8365         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8366                 return;
8367
8368         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8369         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8370         if (r) {
8371                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8372                 return;
8373         }
8374         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8375         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8376         if (r) {
8377                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8378                 return;
8379         }
8380         r = vce_v1_0_init(rdev);
8381         if (r) {
8382                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8383                 return;
8384         }
8385 }
8386
8387 /**
8388  * cik_startup - program the asic to a functional state
8389  *
8390  * @rdev: radeon_device pointer
8391  *
8392  * Programs the asic to a functional state (CIK).
8393  * Called by cik_init() and cik_resume().
8394  * Returns 0 for success, error for failure.
8395  */
8396 static int cik_startup(struct radeon_device *rdev)
8397 {
8398         struct radeon_ring *ring;
8399         u32 nop;
8400         int r;
8401
8402         /* enable pcie gen2/3 link */
8403         cik_pcie_gen3_enable(rdev);
8404         /* enable aspm */
8405         cik_program_aspm(rdev);
8406
8407         /* scratch needs to be initialized before MC */
8408         r = r600_vram_scratch_init(rdev);
8409         if (r)
8410                 return r;
8411
8412         cik_mc_program(rdev);
8413
8414         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8415                 r = ci_mc_load_microcode(rdev);
8416                 if (r) {
8417                         DRM_ERROR("Failed to load MC firmware!\n");
8418                         return r;
8419                 }
8420         }
8421
8422         r = cik_pcie_gart_enable(rdev);
8423         if (r)
8424                 return r;
8425         cik_gpu_init(rdev);
8426
8427         /* allocate rlc buffers */
8428         if (rdev->flags & RADEON_IS_IGP) {
8429                 if (rdev->family == CHIP_KAVERI) {
8430                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8431                         rdev->rlc.reg_list_size =
8432                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8433                 } else {
8434                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8435                         rdev->rlc.reg_list_size =
8436                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8437                 }
8438         }
8439         rdev->rlc.cs_data = ci_cs_data;
8440         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8441         r = sumo_rlc_init(rdev);
8442         if (r) {
8443                 DRM_ERROR("Failed to init rlc BOs!\n");
8444                 return r;
8445         }
8446
8447         /* allocate wb buffer */
8448         r = radeon_wb_init(rdev);
8449         if (r)
8450                 return r;
8451
8452         /* allocate mec buffers */
8453         r = cik_mec_init(rdev);
8454         if (r) {
8455                 DRM_ERROR("Failed to init MEC BOs!\n");
8456                 return r;
8457         }
8458
8459         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8460         if (r) {
8461                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8462                 return r;
8463         }
8464
8465         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8466         if (r) {
8467                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8468                 return r;
8469         }
8470
8471         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8472         if (r) {
8473                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8474                 return r;
8475         }
8476
8477         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8478         if (r) {
8479                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8480                 return r;
8481         }
8482
8483         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8484         if (r) {
8485                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8486                 return r;
8487         }
8488
8489         cik_uvd_start(rdev);
8490         cik_vce_start(rdev);
8491
8492         /* Enable IRQ */
8493         if (!rdev->irq.installed) {
8494                 r = radeon_irq_kms_init(rdev);
8495                 if (r)
8496                         return r;
8497         }
8498
8499         r = cik_irq_init(rdev);
8500         if (r) {
8501                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8502                 radeon_irq_kms_fini(rdev);
8503                 return r;
8504         }
8505         cik_irq_set(rdev);
8506
8507         if (rdev->family == CHIP_HAWAII) {
8508                 if (rdev->new_fw)
8509                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8510                 else
8511                         nop = RADEON_CP_PACKET2;
8512         } else {
8513                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8514         }
8515
8516         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8517         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8518                              nop);
8519         if (r)
8520                 return r;
8521
8522         /* set up the compute queues */
8523         /* type-2 packets are deprecated on MEC, use type-3 instead */
8524         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8525         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8526                              nop);
8527         if (r)
8528                 return r;
8529         ring->me = 1; /* first MEC */
8530         ring->pipe = 0; /* first pipe */
8531         ring->queue = 0; /* first queue */
8532         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8533
8534         /* type-2 packets are deprecated on MEC, use type-3 instead */
8535         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8536         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8537                              nop);
8538         if (r)
8539                 return r;
8540         /* dGPU only have 1 MEC */
8541         ring->me = 1; /* first MEC */
8542         ring->pipe = 0; /* first pipe */
8543         ring->queue = 1; /* second queue */
8544         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8545
8546         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8547         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8548                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8549         if (r)
8550                 return r;
8551
8552         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8553         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8554                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8555         if (r)
8556                 return r;
8557
8558         r = cik_cp_resume(rdev);
8559         if (r)
8560                 return r;
8561
8562         r = cik_sdma_resume(rdev);
8563         if (r)
8564                 return r;
8565
8566         cik_uvd_resume(rdev);
8567         cik_vce_resume(rdev);
8568
8569         r = radeon_ib_pool_init(rdev);
8570         if (r) {
8571                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8572                 return r;
8573         }
8574
8575         r = radeon_vm_manager_init(rdev);
8576         if (r) {
8577                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8578                 return r;
8579         }
8580
8581         r = radeon_audio_init(rdev);
8582         if (r)
8583                 return r;
8584
8585         return 0;
8586 }
8587
8588 /**
8589  * cik_resume - resume the asic to a functional state
8590  *
8591  * @rdev: radeon_device pointer
8592  *
8593  * Programs the asic to a functional state (CIK).
8594  * Called at resume.
8595  * Returns 0 for success, error for failure.
8596  */
8597 int cik_resume(struct radeon_device *rdev)
8598 {
8599         int r;
8600
8601         /* post card */
8602         atom_asic_init(rdev->mode_info.atom_context);
8603
8604         /* init golden registers */
8605         cik_init_golden_registers(rdev);
8606
8607         if (rdev->pm.pm_method == PM_METHOD_DPM)
8608                 radeon_pm_resume(rdev);
8609
8610         rdev->accel_working = true;
8611         r = cik_startup(rdev);
8612         if (r) {
8613                 DRM_ERROR("cik startup failed on resume\n");
8614                 rdev->accel_working = false;
8615                 return r;
8616         }
8617
8618         return r;
8619
8620 }
8621
8622 /**
8623  * cik_suspend - suspend the asic
8624  *
8625  * @rdev: radeon_device pointer
8626  *
8627  * Bring the chip into a state suitable for suspend (CIK).
8628  * Called at suspend.
8629  * Returns 0 for success.
8630  */
8631 int cik_suspend(struct radeon_device *rdev)
8632 {
8633         radeon_pm_suspend(rdev);
8634         radeon_audio_fini(rdev);
8635         radeon_vm_manager_fini(rdev);
8636         cik_cp_enable(rdev, false);
8637         cik_sdma_enable(rdev, false);
8638         if (rdev->has_uvd) {
8639         uvd_v1_0_fini(rdev);
8640         radeon_uvd_suspend(rdev);
8641         }
8642         if (rdev->has_vce)
8643         radeon_vce_suspend(rdev);
8644         cik_fini_pg(rdev);
8645         cik_fini_cg(rdev);
8646         cik_irq_suspend(rdev);
8647         radeon_wb_disable(rdev);
8648         cik_pcie_gart_disable(rdev);
8649         return 0;
8650 }
8651
8652 /* Plan is to move initialization in that function and use
8653  * helper function so that radeon_device_init pretty much
8654  * do nothing more than calling asic specific function. This
8655  * should also allow to remove a bunch of callback function
8656  * like vram_info.
8657  */
8658 /**
8659  * cik_init - asic specific driver and hw init
8660  *
8661  * @rdev: radeon_device pointer
8662  *
8663  * Setup asic specific driver variables and program the hw
8664  * to a functional state (CIK).
8665  * Called at driver startup.
8666  * Returns 0 for success, errors for failure.
8667  */
8668 int cik_init(struct radeon_device *rdev)
8669 {
8670         struct radeon_ring *ring;
8671         int r;
8672
8673         /* Read BIOS */
8674         if (!radeon_get_bios(rdev)) {
8675                 if (ASIC_IS_AVIVO(rdev))
8676                         return -EINVAL;
8677         }
8678         /* Must be an ATOMBIOS */
8679         if (!rdev->is_atom_bios) {
8680                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8681                 return -EINVAL;
8682         }
8683         r = radeon_atombios_init(rdev);
8684         if (r)
8685                 return r;
8686
8687         /* Post card if necessary */
8688         if (!radeon_card_posted(rdev)) {
8689                 if (!rdev->bios) {
8690                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8691                         return -EINVAL;
8692                 }
8693                 DRM_INFO("GPU not posted. posting now...\n");
8694                 atom_asic_init(rdev->mode_info.atom_context);
8695         }
8696         /* init golden registers */
8697         cik_init_golden_registers(rdev);
8698         /* Initialize scratch registers */
8699         cik_scratch_init(rdev);
8700         /* Initialize surface registers */
8701         radeon_surface_init(rdev);
8702         /* Initialize clocks */
8703         radeon_get_clock_info(rdev->ddev);
8704
8705         /* Fence driver */
8706         r = radeon_fence_driver_init(rdev);
8707         if (r)
8708                 return r;
8709
8710         /* initialize memory controller */
8711         r = cik_mc_init(rdev);
8712         if (r)
8713                 return r;
8714         /* Memory manager */
8715         r = radeon_bo_init(rdev);
8716         if (r)
8717                 return r;
8718
8719         if (rdev->flags & RADEON_IS_IGP) {
8720                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8721                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8722                         r = cik_init_microcode(rdev);
8723                         if (r) {
8724                                 DRM_ERROR("Failed to load firmware!\n");
8725                                 return r;
8726                         }
8727                 }
8728         } else {
8729                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8730                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8731                     !rdev->mc_fw) {
8732                         r = cik_init_microcode(rdev);
8733                         if (r) {
8734                                 DRM_ERROR("Failed to load firmware!\n");
8735                                 return r;
8736                         }
8737                 }
8738         }
8739
8740         /* Initialize power management */
8741         radeon_pm_init(rdev);
8742
8743         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8744         ring->ring_obj = NULL;
8745         r600_ring_init(rdev, ring, 1024 * 1024);
8746
8747         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8748         ring->ring_obj = NULL;
8749         r600_ring_init(rdev, ring, 1024 * 1024);
8750         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8751         if (r)
8752                 return r;
8753
8754         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8755         ring->ring_obj = NULL;
8756         r600_ring_init(rdev, ring, 1024 * 1024);
8757         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8758         if (r)
8759                 return r;
8760
8761         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8762         ring->ring_obj = NULL;
8763         r600_ring_init(rdev, ring, 256 * 1024);
8764
8765         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8766         ring->ring_obj = NULL;
8767         r600_ring_init(rdev, ring, 256 * 1024);
8768
8769         cik_uvd_init(rdev);
8770         cik_vce_init(rdev);
8771
8772         rdev->ih.ring_obj = NULL;
8773         r600_ih_ring_init(rdev, 64 * 1024);
8774
8775         r = r600_pcie_gart_init(rdev);
8776         if (r)
8777                 return r;
8778
8779         rdev->accel_working = true;
8780         r = cik_startup(rdev);
8781         if (r) {
8782                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8783                 cik_cp_fini(rdev);
8784                 cik_sdma_fini(rdev);
8785                 cik_irq_fini(rdev);
8786                 sumo_rlc_fini(rdev);
8787                 cik_mec_fini(rdev);
8788                 radeon_wb_fini(rdev);
8789                 radeon_ib_pool_fini(rdev);
8790                 radeon_vm_manager_fini(rdev);
8791                 radeon_irq_kms_fini(rdev);
8792                 cik_pcie_gart_fini(rdev);
8793                 rdev->accel_working = false;
8794         }
8795
8796         /* Don't start up if the MC ucode is missing.
8797          * The default clocks and voltages before the MC ucode
8798          * is loaded are not suffient for advanced operations.
8799          */
8800         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8801                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8802                 return -EINVAL;
8803         }
8804
8805         return 0;
8806 }
8807
8808 /**
8809  * cik_fini - asic specific driver and hw fini
8810  *
8811  * @rdev: radeon_device pointer
8812  *
8813  * Tear down the asic specific driver variables and program the hw
8814  * to an idle state (CIK).
8815  * Called at driver unload.
8816  */
8817 void cik_fini(struct radeon_device *rdev)
8818 {
8819         radeon_pm_fini(rdev);
8820         cik_cp_fini(rdev);
8821         cik_sdma_fini(rdev);
8822         cik_fini_pg(rdev);
8823         cik_fini_cg(rdev);
8824         cik_irq_fini(rdev);
8825         sumo_rlc_fini(rdev);
8826         cik_mec_fini(rdev);
8827         radeon_wb_fini(rdev);
8828         radeon_vm_manager_fini(rdev);
8829         radeon_ib_pool_fini(rdev);
8830         radeon_irq_kms_fini(rdev);
8831         uvd_v1_0_fini(rdev);
8832         radeon_uvd_fini(rdev);
8833         radeon_vce_fini(rdev);
8834         cik_pcie_gart_fini(rdev);
8835         r600_vram_scratch_fini(rdev);
8836         radeon_gem_fini(rdev);
8837         radeon_fence_driver_fini(rdev);
8838         radeon_bo_fini(rdev);
8839         radeon_atombios_fini(rdev);
8840         cik_fini_microcode(rdev);
8841         kfree(rdev->bios);
8842         rdev->bios = NULL;
8843 }
8844
8845 void dce8_program_fmt(struct drm_encoder *encoder)
8846 {
8847         struct drm_device *dev = encoder->dev;
8848         struct radeon_device *rdev = dev->dev_private;
8849         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8850         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8851         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8852         int bpc = 0;
8853         u32 tmp = 0;
8854         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8855
8856         if (connector) {
8857                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8858                 bpc = radeon_get_monitor_bpc(connector);
8859                 dither = radeon_connector->dither;
8860         }
8861
8862         /* LVDS/eDP FMT is set up by atom */
8863         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8864                 return;
8865
8866         /* not needed for analog */
8867         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8868             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8869                 return;
8870
8871         if (bpc == 0)
8872                 return;
8873
8874         switch (bpc) {
8875         case 6:
8876                 if (dither == RADEON_FMT_DITHER_ENABLE)
8877                         /* XXX sort out optimal dither settings */
8878                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8879                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8880                 else
8881                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8882                 break;
8883         case 8:
8884                 if (dither == RADEON_FMT_DITHER_ENABLE)
8885                         /* XXX sort out optimal dither settings */
8886                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8887                                 FMT_RGB_RANDOM_ENABLE |
8888                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8889                 else
8890                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8891                 break;
8892         case 10:
8893                 if (dither == RADEON_FMT_DITHER_ENABLE)
8894                         /* XXX sort out optimal dither settings */
8895                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8896                                 FMT_RGB_RANDOM_ENABLE |
8897                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8898                 else
8899                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8900                 break;
8901         default:
8902                 /* not needed */
8903                 break;
8904         }
8905
8906         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8907 }
8908
8909 /* display watermark setup */
8910 /**
8911  * dce8_line_buffer_adjust - Set up the line buffer
8912  *
8913  * @rdev: radeon_device pointer
8914  * @radeon_crtc: the selected display controller
8915  * @mode: the current display mode on the selected display
8916  * controller
8917  *
8918  * Setup up the line buffer allocation for
8919  * the selected display controller (CIK).
8920  * Returns the line buffer size in pixels.
8921  */
8922 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8923                                    struct radeon_crtc *radeon_crtc,
8924                                    struct drm_display_mode *mode)
8925 {
8926         u32 tmp, buffer_alloc, i;
8927         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8928         /*
8929          * Line Buffer Setup
8930          * There are 6 line buffers, one for each display controllers.
8931          * There are 3 partitions per LB. Select the number of partitions
8932          * to enable based on the display width.  For display widths larger
8933          * than 4096, you need use to use 2 display controllers and combine
8934          * them using the stereo blender.
8935          */
8936         if (radeon_crtc->base.enabled && mode) {
8937                 if (mode->crtc_hdisplay < 1920) {
8938                         tmp = 1;
8939                         buffer_alloc = 2;
8940                 } else if (mode->crtc_hdisplay < 2560) {
8941                         tmp = 2;
8942                         buffer_alloc = 2;
8943                 } else if (mode->crtc_hdisplay < 4096) {
8944                         tmp = 0;
8945                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8946                 } else {
8947                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8948                         tmp = 0;
8949                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8950                 }
8951         } else {
8952                 tmp = 1;
8953                 buffer_alloc = 0;
8954         }
8955
8956         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8957                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8958
8959         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8960                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8961         for (i = 0; i < rdev->usec_timeout; i++) {
8962                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8963                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8964                         break;
8965                 udelay(1);
8966         }
8967
8968         if (radeon_crtc->base.enabled && mode) {
8969                 switch (tmp) {
8970                 case 0:
8971                 default:
8972                         return 4096 * 2;
8973                 case 1:
8974                         return 1920 * 2;
8975                 case 2:
8976                         return 2560 * 2;
8977                 }
8978         }
8979
8980         /* controller not enabled, so no lb used */
8981         return 0;
8982 }
8983
8984 /**
8985  * cik_get_number_of_dram_channels - get the number of dram channels
8986  *
8987  * @rdev: radeon_device pointer
8988  *
8989  * Look up the number of video ram channels (CIK).
8990  * Used for display watermark bandwidth calculations
8991  * Returns the number of dram channels
8992  */
8993 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8994 {
8995         u32 tmp = RREG32(MC_SHARED_CHMAP);
8996
8997         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8998         case 0:
8999         default:
9000                 return 1;
9001         case 1:
9002                 return 2;
9003         case 2:
9004                 return 4;
9005         case 3:
9006                 return 8;
9007         case 4:
9008                 return 3;
9009         case 5:
9010                 return 6;
9011         case 6:
9012                 return 10;
9013         case 7:
9014                 return 12;
9015         case 8:
9016                 return 16;
9017         }
9018 }
9019
9020 struct dce8_wm_params {
9021         u32 dram_channels; /* number of dram channels */
9022         u32 yclk;          /* bandwidth per dram data pin in kHz */
9023         u32 sclk;          /* engine clock in kHz */
9024         u32 disp_clk;      /* display clock in kHz */
9025         u32 src_width;     /* viewport width */
9026         u32 active_time;   /* active display time in ns */
9027         u32 blank_time;    /* blank time in ns */
9028         bool interlaced;    /* mode is interlaced */
9029         fixed20_12 vsc;    /* vertical scale ratio */
9030         u32 num_heads;     /* number of active crtcs */
9031         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9032         u32 lb_size;       /* line buffer allocated to pipe */
9033         u32 vtaps;         /* vertical scaler taps */
9034 };
9035
9036 /**
9037  * dce8_dram_bandwidth - get the dram bandwidth
9038  *
9039  * @wm: watermark calculation data
9040  *
9041  * Calculate the raw dram bandwidth (CIK).
9042  * Used for display watermark bandwidth calculations
9043  * Returns the dram bandwidth in MBytes/s
9044  */
9045 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9046 {
9047         /* Calculate raw DRAM Bandwidth */
9048         fixed20_12 dram_efficiency; /* 0.7 */
9049         fixed20_12 yclk, dram_channels, bandwidth;
9050         fixed20_12 a;
9051
9052         a.full = dfixed_const(1000);
9053         yclk.full = dfixed_const(wm->yclk);
9054         yclk.full = dfixed_div(yclk, a);
9055         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9056         a.full = dfixed_const(10);
9057         dram_efficiency.full = dfixed_const(7);
9058         dram_efficiency.full = dfixed_div(dram_efficiency, a);
9059         bandwidth.full = dfixed_mul(dram_channels, yclk);
9060         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9061
9062         return dfixed_trunc(bandwidth);
9063 }
9064
9065 /**
9066  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9067  *
9068  * @wm: watermark calculation data
9069  *
9070  * Calculate the dram bandwidth used for display (CIK).
9071  * Used for display watermark bandwidth calculations
9072  * Returns the dram bandwidth for display in MBytes/s
9073  */
9074 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9075 {
9076         /* Calculate DRAM Bandwidth and the part allocated to display. */
9077         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9078         fixed20_12 yclk, dram_channels, bandwidth;
9079         fixed20_12 a;
9080
9081         a.full = dfixed_const(1000);
9082         yclk.full = dfixed_const(wm->yclk);
9083         yclk.full = dfixed_div(yclk, a);
9084         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9085         a.full = dfixed_const(10);
9086         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9087         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9088         bandwidth.full = dfixed_mul(dram_channels, yclk);
9089         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9090
9091         return dfixed_trunc(bandwidth);
9092 }
9093
9094 /**
9095  * dce8_data_return_bandwidth - get the data return bandwidth
9096  *
9097  * @wm: watermark calculation data
9098  *
9099  * Calculate the data return bandwidth used for display (CIK).
9100  * Used for display watermark bandwidth calculations
9101  * Returns the data return bandwidth in MBytes/s
9102  */
9103 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9104 {
9105         /* Calculate the display Data return Bandwidth */
9106         fixed20_12 return_efficiency; /* 0.8 */
9107         fixed20_12 sclk, bandwidth;
9108         fixed20_12 a;
9109
9110         a.full = dfixed_const(1000);
9111         sclk.full = dfixed_const(wm->sclk);
9112         sclk.full = dfixed_div(sclk, a);
9113         a.full = dfixed_const(10);
9114         return_efficiency.full = dfixed_const(8);
9115         return_efficiency.full = dfixed_div(return_efficiency, a);
9116         a.full = dfixed_const(32);
9117         bandwidth.full = dfixed_mul(a, sclk);
9118         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9119
9120         return dfixed_trunc(bandwidth);
9121 }
9122
9123 /**
9124  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9125  *
9126  * @wm: watermark calculation data
9127  *
9128  * Calculate the dmif bandwidth used for display (CIK).
9129  * Used for display watermark bandwidth calculations
9130  * Returns the dmif bandwidth in MBytes/s
9131  */
9132 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9133 {
9134         /* Calculate the DMIF Request Bandwidth */
9135         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9136         fixed20_12 disp_clk, bandwidth;
9137         fixed20_12 a, b;
9138
9139         a.full = dfixed_const(1000);
9140         disp_clk.full = dfixed_const(wm->disp_clk);
9141         disp_clk.full = dfixed_div(disp_clk, a);
9142         a.full = dfixed_const(32);
9143         b.full = dfixed_mul(a, disp_clk);
9144
9145         a.full = dfixed_const(10);
9146         disp_clk_request_efficiency.full = dfixed_const(8);
9147         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9148
9149         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9150
9151         return dfixed_trunc(bandwidth);
9152 }
9153
9154 /**
9155  * dce8_available_bandwidth - get the min available bandwidth
9156  *
9157  * @wm: watermark calculation data
9158  *
9159  * Calculate the min available bandwidth used for display (CIK).
9160  * Used for display watermark bandwidth calculations
9161  * Returns the min available bandwidth in MBytes/s
9162  */
9163 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9164 {
9165         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9166         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9167         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9168         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9169
9170         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9171 }
9172
9173 /**
9174  * dce8_average_bandwidth - get the average available bandwidth
9175  *
9176  * @wm: watermark calculation data
9177  *
9178  * Calculate the average available bandwidth used for display (CIK).
9179  * Used for display watermark bandwidth calculations
9180  * Returns the average available bandwidth in MBytes/s
9181  */
9182 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9183 {
9184         /* Calculate the display mode Average Bandwidth
9185          * DisplayMode should contain the source and destination dimensions,
9186          * timing, etc.
9187          */
9188         fixed20_12 bpp;
9189         fixed20_12 line_time;
9190         fixed20_12 src_width;
9191         fixed20_12 bandwidth;
9192         fixed20_12 a;
9193
9194         a.full = dfixed_const(1000);
9195         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9196         line_time.full = dfixed_div(line_time, a);
9197         bpp.full = dfixed_const(wm->bytes_per_pixel);
9198         src_width.full = dfixed_const(wm->src_width);
9199         bandwidth.full = dfixed_mul(src_width, bpp);
9200         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9201         bandwidth.full = dfixed_div(bandwidth, line_time);
9202
9203         return dfixed_trunc(bandwidth);
9204 }
9205
9206 /**
9207  * dce8_latency_watermark - get the latency watermark
9208  *
9209  * @wm: watermark calculation data
9210  *
9211  * Calculate the latency watermark (CIK).
9212  * Used for display watermark bandwidth calculations
9213  * Returns the latency watermark in ns
9214  */
9215 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9216 {
9217         /* First calculate the latency in ns */
9218         u32 mc_latency = 2000; /* 2000 ns. */
9219         u32 available_bandwidth = dce8_available_bandwidth(wm);
9220         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9221         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9222         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9223         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9224                 (wm->num_heads * cursor_line_pair_return_time);
9225         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9226         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9227         u32 tmp, dmif_size = 12288;
9228         fixed20_12 a, b, c;
9229
9230         if (wm->num_heads == 0)
9231                 return 0;
9232
9233         a.full = dfixed_const(2);
9234         b.full = dfixed_const(1);
9235         if ((wm->vsc.full > a.full) ||
9236             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9237             (wm->vtaps >= 5) ||
9238             ((wm->vsc.full >= a.full) && wm->interlaced))
9239                 max_src_lines_per_dst_line = 4;
9240         else
9241                 max_src_lines_per_dst_line = 2;
9242
9243         a.full = dfixed_const(available_bandwidth);
9244         b.full = dfixed_const(wm->num_heads);
9245         a.full = dfixed_div(a, b);
9246
9247         b.full = dfixed_const(mc_latency + 512);
9248         c.full = dfixed_const(wm->disp_clk);
9249         b.full = dfixed_div(b, c);
9250
9251         c.full = dfixed_const(dmif_size);
9252         b.full = dfixed_div(c, b);
9253
9254         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9255
9256         b.full = dfixed_const(1000);
9257         c.full = dfixed_const(wm->disp_clk);
9258         b.full = dfixed_div(c, b);
9259         c.full = dfixed_const(wm->bytes_per_pixel);
9260         b.full = dfixed_mul(b, c);
9261
9262         lb_fill_bw = min(tmp, dfixed_trunc(b));
9263
9264         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9265         b.full = dfixed_const(1000);
9266         c.full = dfixed_const(lb_fill_bw);
9267         b.full = dfixed_div(c, b);
9268         a.full = dfixed_div(a, b);
9269         line_fill_time = dfixed_trunc(a);
9270
9271         if (line_fill_time < wm->active_time)
9272                 return latency;
9273         else
9274                 return latency + (line_fill_time - wm->active_time);
9275
9276 }
9277
9278 /**
9279  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9280  * average and available dram bandwidth
9281  *
9282  * @wm: watermark calculation data
9283  *
9284  * Check if the display average bandwidth fits in the display
9285  * dram bandwidth (CIK).
9286  * Used for display watermark bandwidth calculations
9287  * Returns true if the display fits, false if not.
9288  */
9289 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9290 {
9291         if (dce8_average_bandwidth(wm) <=
9292             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9293                 return true;
9294         else
9295                 return false;
9296 }
9297
9298 /**
9299  * dce8_average_bandwidth_vs_available_bandwidth - check
9300  * average and available bandwidth
9301  *
9302  * @wm: watermark calculation data
9303  *
9304  * Check if the display average bandwidth fits in the display
9305  * available bandwidth (CIK).
9306  * Used for display watermark bandwidth calculations
9307  * Returns true if the display fits, false if not.
9308  */
9309 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9310 {
9311         if (dce8_average_bandwidth(wm) <=
9312             (dce8_available_bandwidth(wm) / wm->num_heads))
9313                 return true;
9314         else
9315                 return false;
9316 }
9317
9318 /**
9319  * dce8_check_latency_hiding - check latency hiding
9320  *
9321  * @wm: watermark calculation data
9322  *
9323  * Check latency hiding (CIK).
9324  * Used for display watermark bandwidth calculations
9325  * Returns true if the display fits, false if not.
9326  */
9327 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9328 {
9329         u32 lb_partitions = wm->lb_size / wm->src_width;
9330         u32 line_time = wm->active_time + wm->blank_time;
9331         u32 latency_tolerant_lines;
9332         u32 latency_hiding;
9333         fixed20_12 a;
9334
9335         a.full = dfixed_const(1);
9336         if (wm->vsc.full > a.full)
9337                 latency_tolerant_lines = 1;
9338         else {
9339                 if (lb_partitions <= (wm->vtaps + 1))
9340                         latency_tolerant_lines = 1;
9341                 else
9342                         latency_tolerant_lines = 2;
9343         }
9344
9345         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9346
9347         if (dce8_latency_watermark(wm) <= latency_hiding)
9348                 return true;
9349         else
9350                 return false;
9351 }
9352
9353 /**
9354  * dce8_program_watermarks - program display watermarks
9355  *
9356  * @rdev: radeon_device pointer
9357  * @radeon_crtc: the selected display controller
9358  * @lb_size: line buffer size
9359  * @num_heads: number of display controllers in use
9360  *
9361  * Calculate and program the display watermarks for the
9362  * selected display controller (CIK).
9363  */
9364 static void dce8_program_watermarks(struct radeon_device *rdev,
9365                                     struct radeon_crtc *radeon_crtc,
9366                                     u32 lb_size, u32 num_heads)
9367 {
9368         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9369         struct dce8_wm_params wm_low, wm_high;
9370         u32 pixel_period;
9371         u32 line_time = 0;
9372         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9373         u32 tmp, wm_mask;
9374
9375         if (radeon_crtc->base.enabled && num_heads && mode) {
9376                 pixel_period = 1000000 / (u32)mode->clock;
9377                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9378
9379                 /* watermark for high clocks */
9380                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9381                     rdev->pm.dpm_enabled) {
9382                         wm_high.yclk =
9383                                 radeon_dpm_get_mclk(rdev, false) * 10;
9384                         wm_high.sclk =
9385                                 radeon_dpm_get_sclk(rdev, false) * 10;
9386                 } else {
9387                         wm_high.yclk = rdev->pm.current_mclk * 10;
9388                         wm_high.sclk = rdev->pm.current_sclk * 10;
9389                 }
9390
9391                 wm_high.disp_clk = mode->clock;
9392                 wm_high.src_width = mode->crtc_hdisplay;
9393                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9394                 wm_high.blank_time = line_time - wm_high.active_time;
9395                 wm_high.interlaced = false;
9396                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9397                         wm_high.interlaced = true;
9398                 wm_high.vsc = radeon_crtc->vsc;
9399                 wm_high.vtaps = 1;
9400                 if (radeon_crtc->rmx_type != RMX_OFF)
9401                         wm_high.vtaps = 2;
9402                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9403                 wm_high.lb_size = lb_size;
9404                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9405                 wm_high.num_heads = num_heads;
9406
9407                 /* set for high clocks */
9408                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9409
9410                 /* possibly force display priority to high */
9411                 /* should really do this at mode validation time... */
9412                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9413                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9414                     !dce8_check_latency_hiding(&wm_high) ||
9415                     (rdev->disp_priority == 2)) {
9416                         DRM_DEBUG_KMS("force priority to high\n");
9417                 }
9418
9419                 /* watermark for low clocks */
9420                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9421                     rdev->pm.dpm_enabled) {
9422                         wm_low.yclk =
9423                                 radeon_dpm_get_mclk(rdev, true) * 10;
9424                         wm_low.sclk =
9425                                 radeon_dpm_get_sclk(rdev, true) * 10;
9426                 } else {
9427                         wm_low.yclk = rdev->pm.current_mclk * 10;
9428                         wm_low.sclk = rdev->pm.current_sclk * 10;
9429                 }
9430
9431                 wm_low.disp_clk = mode->clock;
9432                 wm_low.src_width = mode->crtc_hdisplay;
9433                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9434                 wm_low.blank_time = line_time - wm_low.active_time;
9435                 wm_low.interlaced = false;
9436                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9437                         wm_low.interlaced = true;
9438                 wm_low.vsc = radeon_crtc->vsc;
9439                 wm_low.vtaps = 1;
9440                 if (radeon_crtc->rmx_type != RMX_OFF)
9441                         wm_low.vtaps = 2;
9442                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9443                 wm_low.lb_size = lb_size;
9444                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9445                 wm_low.num_heads = num_heads;
9446
9447                 /* set for low clocks */
9448                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9449
9450                 /* possibly force display priority to high */
9451                 /* should really do this at mode validation time... */
9452                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9453                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9454                     !dce8_check_latency_hiding(&wm_low) ||
9455                     (rdev->disp_priority == 2)) {
9456                         DRM_DEBUG_KMS("force priority to high\n");
9457                 }
9458
9459                 /* Save number of lines the linebuffer leads before the scanout */
9460                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9461         }
9462
9463         /* select wm A */
9464         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9465         tmp = wm_mask;
9466         tmp &= ~LATENCY_WATERMARK_MASK(3);
9467         tmp |= LATENCY_WATERMARK_MASK(1);
9468         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9469         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9470                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9471                 LATENCY_HIGH_WATERMARK(line_time)));
9472         /* select wm B */
9473         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9474         tmp &= ~LATENCY_WATERMARK_MASK(3);
9475         tmp |= LATENCY_WATERMARK_MASK(2);
9476         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9477         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9478                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9479                 LATENCY_HIGH_WATERMARK(line_time)));
9480         /* restore original selection */
9481         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9482
9483         /* save values for DPM */
9484         radeon_crtc->line_time = line_time;
9485         radeon_crtc->wm_high = latency_watermark_a;
9486         radeon_crtc->wm_low = latency_watermark_b;
9487 }
9488
9489 /**
9490  * dce8_bandwidth_update - program display watermarks
9491  *
9492  * @rdev: radeon_device pointer
9493  *
9494  * Calculate and program the display watermarks and line
9495  * buffer allocation (CIK).
9496  */
9497 void dce8_bandwidth_update(struct radeon_device *rdev)
9498 {
9499         struct drm_display_mode *mode = NULL;
9500         u32 num_heads = 0, lb_size;
9501         int i;
9502
9503         if (!rdev->mode_info.mode_config_initialized)
9504                 return;
9505
9506         radeon_update_display_priority(rdev);
9507
9508         for (i = 0; i < rdev->num_crtc; i++) {
9509                 if (rdev->mode_info.crtcs[i]->base.enabled)
9510                         num_heads++;
9511         }
9512         for (i = 0; i < rdev->num_crtc; i++) {
9513                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9514                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9515                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9516         }
9517 }
9518
9519 /**
9520  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9521  *
9522  * @rdev: radeon_device pointer
9523  *
9524  * Fetches a GPU clock counter snapshot (SI).
9525  * Returns the 64 bit clock counter snapshot.
9526  */
9527 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9528 {
9529         uint64_t clock;
9530
9531         mutex_lock(&rdev->gpu_clock_mutex);
9532         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9533         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9534                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9535         mutex_unlock(&rdev->gpu_clock_mutex);
9536         return clock;
9537 }
9538
9539 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9540                               u32 cntl_reg, u32 status_reg)
9541 {
9542         int r, i;
9543         struct atom_clock_dividers dividers;
9544         uint32_t tmp;
9545
9546         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9547                                            clock, false, &dividers);
9548         if (r)
9549                 return r;
9550
9551         tmp = RREG32_SMC(cntl_reg);
9552         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9553         tmp |= dividers.post_divider;
9554         WREG32_SMC(cntl_reg, tmp);
9555
9556         for (i = 0; i < 100; i++) {
9557                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9558                         break;
9559                 mdelay(10);
9560         }
9561         if (i == 100)
9562                 return -ETIMEDOUT;
9563
9564         return 0;
9565 }
9566
9567 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9568 {
9569         int r = 0;
9570
9571         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9572         if (r)
9573                 return r;
9574
9575         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9576         return r;
9577 }
9578
9579 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9580 {
9581         int r, i;
9582         struct atom_clock_dividers dividers;
9583         u32 tmp;
9584
9585         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9586                                            ecclk, false, &dividers);
9587         if (r)
9588                 return r;
9589
9590         for (i = 0; i < 100; i++) {
9591                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9592                         break;
9593                 mdelay(10);
9594         }
9595         if (i == 100)
9596                 return -ETIMEDOUT;
9597
9598         tmp = RREG32_SMC(CG_ECLK_CNTL);
9599         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9600         tmp |= dividers.post_divider;
9601         WREG32_SMC(CG_ECLK_CNTL, tmp);
9602
9603         for (i = 0; i < 100; i++) {
9604                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9605                         break;
9606                 mdelay(10);
9607         }
9608         if (i == 100)
9609                 return -ETIMEDOUT;
9610
9611         return 0;
9612 }
9613
9614 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9615 {
9616         struct pci_dev *root = rdev->pdev->bus->self;
9617         int bridge_pos, gpu_pos;
9618         u32 speed_cntl, mask, current_data_rate;
9619         int ret, i;
9620         u16 tmp16;
9621
9622         if (radeon_pcie_gen2 == 0)
9623                 return;
9624
9625         if (rdev->flags & RADEON_IS_IGP)
9626                 return;
9627
9628         if (!(rdev->flags & RADEON_IS_PCIE))
9629                 return;
9630
9631         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9632         if (ret != 0)
9633                 return;
9634
9635         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9636                 return;
9637
9638         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9639         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9640                 LC_CURRENT_DATA_RATE_SHIFT;
9641         if (mask & DRM_PCIE_SPEED_80) {
9642                 if (current_data_rate == 2) {
9643                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9644                         return;
9645                 }
9646                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9647         } else if (mask & DRM_PCIE_SPEED_50) {
9648                 if (current_data_rate == 1) {
9649                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9650                         return;
9651                 }
9652                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9653         }
9654
9655         bridge_pos = pci_get_pciecap_ptr(root->dev.bsddev);
9656         if (!bridge_pos)
9657                 return;
9658
9659         gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev.bsddev);
9660         if (!gpu_pos)
9661                 return;
9662
9663         if (mask & DRM_PCIE_SPEED_80) {
9664                 /* re-try equalization if gen3 is not already enabled */
9665                 if (current_data_rate != 2) {
9666                         u16 bridge_cfg, gpu_cfg;
9667                         u16 bridge_cfg2, gpu_cfg2;
9668                         u32 max_lw, current_lw, tmp;
9669
9670                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9671                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9672
9673                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9674                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9675
9676                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9677                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9678
9679                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9680                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9681                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9682
9683                         if (current_lw < max_lw) {
9684                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9685                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9686                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9687                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9688                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9689                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9690                                 }
9691                         }
9692
9693                         for (i = 0; i < 10; i++) {
9694                                 /* check status */
9695                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9696                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9697                                         break;
9698
9699                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9700                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9701
9702                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9703                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9704
9705                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9706                                 tmp |= LC_SET_QUIESCE;
9707                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9708
9709                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9710                                 tmp |= LC_REDO_EQ;
9711                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9712
9713                                 mdelay(100);
9714
9715                                 /* linkctl */
9716                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9717                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9718                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9719                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9720
9721                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9722                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9723                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9724                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9725
9726                                 /* linkctl2 */
9727                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9728                                 tmp16 &= ~((1 << 4) | (7 << 9));
9729                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9730                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9731
9732                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9733                                 tmp16 &= ~((1 << 4) | (7 << 9));
9734                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9735                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9736
9737                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9738                                 tmp &= ~LC_SET_QUIESCE;
9739                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9740                         }
9741                 }
9742         }
9743
9744         /* set the link speed */
9745         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9746         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9747         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9748
9749         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9750         tmp16 &= ~0xf;
9751         if (mask & DRM_PCIE_SPEED_80)
9752                 tmp16 |= 3; /* gen3 */
9753         else if (mask & DRM_PCIE_SPEED_50)
9754                 tmp16 |= 2; /* gen2 */
9755         else
9756                 tmp16 |= 1; /* gen1 */
9757         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9758
9759         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9760         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9761         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9762
9763         for (i = 0; i < rdev->usec_timeout; i++) {
9764                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9765                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9766                         break;
9767                 udelay(1);
9768         }
9769 }
9770
9771 static void cik_program_aspm(struct radeon_device *rdev)
9772 {
9773         u32 data, orig;
9774         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9775         bool disable_clkreq = false;
9776
9777         if (radeon_aspm == 0)
9778                 return;
9779
9780         /* XXX double check IGPs */
9781         if (rdev->flags & RADEON_IS_IGP)
9782                 return;
9783
9784         if (!(rdev->flags & RADEON_IS_PCIE))
9785                 return;
9786
9787         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9788         data &= ~LC_XMIT_N_FTS_MASK;
9789         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9790         if (orig != data)
9791                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9792
9793         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9794         data |= LC_GO_TO_RECOVERY;
9795         if (orig != data)
9796                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9797
9798         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9799         data |= P_IGNORE_EDB_ERR;
9800         if (orig != data)
9801                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9802
9803         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9804         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9805         data |= LC_PMI_TO_L1_DIS;
9806         if (!disable_l0s)
9807                 data |= LC_L0S_INACTIVITY(7);
9808
9809         if (!disable_l1) {
9810                 data |= LC_L1_INACTIVITY(7);
9811                 data &= ~LC_PMI_TO_L1_DIS;
9812                 if (orig != data)
9813                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9814
9815                 if (!disable_plloff_in_l1) {
9816                         bool clk_req_support;
9817
9818                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9819                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9820                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9821                         if (orig != data)
9822                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9823
9824                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9825                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9826                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9827                         if (orig != data)
9828                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9829
9830                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9831                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9832                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9833                         if (orig != data)
9834                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9835
9836                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9837                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9838                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9839                         if (orig != data)
9840                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9841
9842                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9843                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9844                         data |= LC_DYN_LANES_PWR_STATE(3);
9845                         if (orig != data)
9846                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9847
9848                         if (!disable_clkreq) {
9849 #ifdef zMN_TODO
9850                                 struct pci_dev *root = rdev->pdev->bus->self;
9851                                 u32 lnkcap;
9852
9853                                 clk_req_support = false;
9854                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9855                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9856                                         clk_req_support = true;
9857 #else
9858                                 clk_req_support = false;
9859 #endif
9860                         } else {
9861                                 clk_req_support = false;
9862                         }
9863
9864                         if (clk_req_support) {
9865                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9866                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9867                                 if (orig != data)
9868                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9869
9870                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9871                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9872                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9873                                 if (orig != data)
9874                                         WREG32_SMC(THM_CLK_CNTL, data);
9875
9876                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9877                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9878                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9879                                 if (orig != data)
9880                                         WREG32_SMC(MISC_CLK_CTRL, data);
9881
9882                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9883                                 data &= ~BCLK_AS_XCLK;
9884                                 if (orig != data)
9885                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9886
9887                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9888                                 data &= ~FORCE_BIF_REFCLK_EN;
9889                                 if (orig != data)
9890                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9891
9892                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9893                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9894                                 data |= MPLL_CLKOUT_SEL(4);
9895                                 if (orig != data)
9896                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9897                         }
9898                 }
9899         } else {
9900                 if (orig != data)
9901                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9902         }
9903
9904         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9905         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9906         if (orig != data)
9907                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9908
9909         if (!disable_l0s) {
9910                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9911                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9912                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9913                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9914                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9915                                 data &= ~LC_L0S_INACTIVITY_MASK;
9916                                 if (orig != data)
9917                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9918                         }
9919                 }
9920         }
9921 }