nrelease - fix/improve livecd
[dragonfly.git] / sys / dev / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39
40 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49
50 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51 MODULE_FIRMWARE("radeon/bonaire_me.bin");
52 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59
60 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69
70 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71 MODULE_FIRMWARE("radeon/hawaii_me.bin");
72 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79
80 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86
87 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88 MODULE_FIRMWARE("radeon/kaveri_me.bin");
89 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94
95 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96 MODULE_FIRMWARE("radeon/KABINI_me.bin");
97 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101
102 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103 MODULE_FIRMWARE("radeon/kabini_me.bin");
104 MODULE_FIRMWARE("radeon/kabini_ce.bin");
105 MODULE_FIRMWARE("radeon/kabini_mec.bin");
106 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108
109 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115
116 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117 MODULE_FIRMWARE("radeon/mullins_me.bin");
118 MODULE_FIRMWARE("radeon/mullins_ce.bin");
119 MODULE_FIRMWARE("radeon/mullins_mec.bin");
120 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122
123 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
124 static void cik_rlc_stop(struct radeon_device *rdev);
125 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
126 static void cik_program_aspm(struct radeon_device *rdev);
127 static void cik_init_pg(struct radeon_device *rdev);
128 static void cik_init_cg(struct radeon_device *rdev);
129 static void cik_fini_pg(struct radeon_device *rdev);
130 static void cik_fini_cg(struct radeon_device *rdev);
131 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
132                                           bool enable);
133
134 /**
135  * cik_get_allowed_info_register - fetch the register for the info ioctl
136  *
137  * @rdev: radeon_device pointer
138  * @reg: register offset in bytes
139  * @val: register value
140  *
141  * Returns 0 for success or -EINVAL for an invalid register
142  *
143  */
144 int cik_get_allowed_info_register(struct radeon_device *rdev,
145                                   u32 reg, u32 *val)
146 {
147         switch (reg) {
148         case GRBM_STATUS:
149         case GRBM_STATUS2:
150         case GRBM_STATUS_SE0:
151         case GRBM_STATUS_SE1:
152         case GRBM_STATUS_SE2:
153         case GRBM_STATUS_SE3:
154         case SRBM_STATUS:
155         case SRBM_STATUS2:
156         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
157         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
158         case UVD_STATUS:
159         /* TODO VCE */
160                 *val = RREG32(reg);
161                 return 0;
162         default:
163                 return -EINVAL;
164         }
165 }
166
167 /*
168  * Indirect registers accessor
169  */
170 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
171 {
172         unsigned long flags;
173         u32 r;
174
175         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
176         WREG32(CIK_DIDT_IND_INDEX, (reg));
177         r = RREG32(CIK_DIDT_IND_DATA);
178         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
179         return r;
180 }
181
182 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
183 {
184         unsigned long flags;
185
186         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
187         WREG32(CIK_DIDT_IND_INDEX, (reg));
188         WREG32(CIK_DIDT_IND_DATA, (v));
189         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
190 }
191
192 /* get temperature in millidegrees */
193 int ci_get_temp(struct radeon_device *rdev)
194 {
195         u32 temp;
196         int actual_temp = 0;
197
198         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
199                 CTF_TEMP_SHIFT;
200
201         if (temp & 0x200)
202                 actual_temp = 255;
203         else
204                 actual_temp = temp & 0x1ff;
205
206         actual_temp = actual_temp * 1000;
207
208         return actual_temp;
209 }
210
211 /* get temperature in millidegrees */
212 int kv_get_temp(struct radeon_device *rdev)
213 {
214         u32 temp;
215         int actual_temp = 0;
216
217         temp = RREG32_SMC(0xC0300E0C);
218
219         if (temp)
220                 actual_temp = (temp / 8) - 49;
221         else
222                 actual_temp = 0;
223
224         actual_temp = actual_temp * 1000;
225
226         return actual_temp;
227 }
228
229 /*
230  * Indirect registers accessor
231  */
232 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
233 {
234         unsigned long flags;
235         u32 r;
236
237         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
238         WREG32(PCIE_INDEX, reg);
239         (void)RREG32(PCIE_INDEX);
240         r = RREG32(PCIE_DATA);
241         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
242         return r;
243 }
244
245 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
246 {
247         unsigned long flags;
248
249         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
250         WREG32(PCIE_INDEX, reg);
251         (void)RREG32(PCIE_INDEX);
252         WREG32(PCIE_DATA, v);
253         (void)RREG32(PCIE_DATA);
254         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
255 }
256
257 static const u32 spectre_rlc_save_restore_register_list[] =
258 {
259         (0x0e00 << 16) | (0xc12c >> 2),
260         0x00000000,
261         (0x0e00 << 16) | (0xc140 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0xc150 >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc15c >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc168 >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0xc170 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc178 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc204 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc2b4 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc2b8 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc2bc >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc2c0 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0x8228 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0x829c >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0x869c >> 2),
288         0x00000000,
289         (0x0600 << 16) | (0x98f4 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0x98f8 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0x9900 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc260 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x90e8 >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0x3c000 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0x3c00c >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0x8c1c >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0x9700 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xcd20 >> 2),
308         0x00000000,
309         (0x4e00 << 16) | (0xcd20 >> 2),
310         0x00000000,
311         (0x5e00 << 16) | (0xcd20 >> 2),
312         0x00000000,
313         (0x6e00 << 16) | (0xcd20 >> 2),
314         0x00000000,
315         (0x7e00 << 16) | (0xcd20 >> 2),
316         0x00000000,
317         (0x8e00 << 16) | (0xcd20 >> 2),
318         0x00000000,
319         (0x9e00 << 16) | (0xcd20 >> 2),
320         0x00000000,
321         (0xae00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0xbe00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x0e00 << 16) | (0x89bc >> 2),
326         0x00000000,
327         (0x0e00 << 16) | (0x8900 >> 2),
328         0x00000000,
329         0x3,
330         (0x0e00 << 16) | (0xc130 >> 2),
331         0x00000000,
332         (0x0e00 << 16) | (0xc134 >> 2),
333         0x00000000,
334         (0x0e00 << 16) | (0xc1fc >> 2),
335         0x00000000,
336         (0x0e00 << 16) | (0xc208 >> 2),
337         0x00000000,
338         (0x0e00 << 16) | (0xc264 >> 2),
339         0x00000000,
340         (0x0e00 << 16) | (0xc268 >> 2),
341         0x00000000,
342         (0x0e00 << 16) | (0xc26c >> 2),
343         0x00000000,
344         (0x0e00 << 16) | (0xc270 >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc274 >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc278 >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc27c >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc280 >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc284 >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc288 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc28c >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc290 >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc294 >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc298 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc29c >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc2a0 >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc2a4 >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc2a8 >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc2ac  >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc2b0 >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0x301d0 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0x30238 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0x30250 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0x30254 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0x30258 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x3025c >> 2),
389         0x00000000,
390         (0x4e00 << 16) | (0xc900 >> 2),
391         0x00000000,
392         (0x5e00 << 16) | (0xc900 >> 2),
393         0x00000000,
394         (0x6e00 << 16) | (0xc900 >> 2),
395         0x00000000,
396         (0x7e00 << 16) | (0xc900 >> 2),
397         0x00000000,
398         (0x8e00 << 16) | (0xc900 >> 2),
399         0x00000000,
400         (0x9e00 << 16) | (0xc900 >> 2),
401         0x00000000,
402         (0xae00 << 16) | (0xc900 >> 2),
403         0x00000000,
404         (0xbe00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x4e00 << 16) | (0xc904 >> 2),
407         0x00000000,
408         (0x5e00 << 16) | (0xc904 >> 2),
409         0x00000000,
410         (0x6e00 << 16) | (0xc904 >> 2),
411         0x00000000,
412         (0x7e00 << 16) | (0xc904 >> 2),
413         0x00000000,
414         (0x8e00 << 16) | (0xc904 >> 2),
415         0x00000000,
416         (0x9e00 << 16) | (0xc904 >> 2),
417         0x00000000,
418         (0xae00 << 16) | (0xc904 >> 2),
419         0x00000000,
420         (0xbe00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x4e00 << 16) | (0xc908 >> 2),
423         0x00000000,
424         (0x5e00 << 16) | (0xc908 >> 2),
425         0x00000000,
426         (0x6e00 << 16) | (0xc908 >> 2),
427         0x00000000,
428         (0x7e00 << 16) | (0xc908 >> 2),
429         0x00000000,
430         (0x8e00 << 16) | (0xc908 >> 2),
431         0x00000000,
432         (0x9e00 << 16) | (0xc908 >> 2),
433         0x00000000,
434         (0xae00 << 16) | (0xc908 >> 2),
435         0x00000000,
436         (0xbe00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x4e00 << 16) | (0xc90c >> 2),
439         0x00000000,
440         (0x5e00 << 16) | (0xc90c >> 2),
441         0x00000000,
442         (0x6e00 << 16) | (0xc90c >> 2),
443         0x00000000,
444         (0x7e00 << 16) | (0xc90c >> 2),
445         0x00000000,
446         (0x8e00 << 16) | (0xc90c >> 2),
447         0x00000000,
448         (0x9e00 << 16) | (0xc90c >> 2),
449         0x00000000,
450         (0xae00 << 16) | (0xc90c >> 2),
451         0x00000000,
452         (0xbe00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x4e00 << 16) | (0xc910 >> 2),
455         0x00000000,
456         (0x5e00 << 16) | (0xc910 >> 2),
457         0x00000000,
458         (0x6e00 << 16) | (0xc910 >> 2),
459         0x00000000,
460         (0x7e00 << 16) | (0xc910 >> 2),
461         0x00000000,
462         (0x8e00 << 16) | (0xc910 >> 2),
463         0x00000000,
464         (0x9e00 << 16) | (0xc910 >> 2),
465         0x00000000,
466         (0xae00 << 16) | (0xc910 >> 2),
467         0x00000000,
468         (0xbe00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0xc99c >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x9834 >> 2),
473         0x00000000,
474         (0x0000 << 16) | (0x30f00 >> 2),
475         0x00000000,
476         (0x0001 << 16) | (0x30f00 >> 2),
477         0x00000000,
478         (0x0000 << 16) | (0x30f04 >> 2),
479         0x00000000,
480         (0x0001 << 16) | (0x30f04 >> 2),
481         0x00000000,
482         (0x0000 << 16) | (0x30f08 >> 2),
483         0x00000000,
484         (0x0001 << 16) | (0x30f08 >> 2),
485         0x00000000,
486         (0x0000 << 16) | (0x30f0c >> 2),
487         0x00000000,
488         (0x0001 << 16) | (0x30f0c >> 2),
489         0x00000000,
490         (0x0600 << 16) | (0x9b7c >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0x8a14 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0x8a18 >> 2),
495         0x00000000,
496         (0x0600 << 16) | (0x30a00 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0x8bf0 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0x8bcc >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0x8b24 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x30a04 >> 2),
505         0x00000000,
506         (0x0600 << 16) | (0x30a10 >> 2),
507         0x00000000,
508         (0x0600 << 16) | (0x30a14 >> 2),
509         0x00000000,
510         (0x0600 << 16) | (0x30a18 >> 2),
511         0x00000000,
512         (0x0600 << 16) | (0x30a2c >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0xc700 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0xc704 >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0xc708 >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0xc768 >> 2),
521         0x00000000,
522         (0x0400 << 16) | (0xc770 >> 2),
523         0x00000000,
524         (0x0400 << 16) | (0xc774 >> 2),
525         0x00000000,
526         (0x0400 << 16) | (0xc778 >> 2),
527         0x00000000,
528         (0x0400 << 16) | (0xc77c >> 2),
529         0x00000000,
530         (0x0400 << 16) | (0xc780 >> 2),
531         0x00000000,
532         (0x0400 << 16) | (0xc784 >> 2),
533         0x00000000,
534         (0x0400 << 16) | (0xc788 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc78c >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc798 >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc79c >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc7a0 >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc7a4 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc7a8 >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc7ac >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc7b0 >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc7b4 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x9100 >> 2),
555         0x00000000,
556         (0x0e00 << 16) | (0x3c010 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x92a8 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x92ac >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x92b4 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x92b8 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x92bc >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x92c0 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x92c4 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92c8 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92cc >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92d0 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x8c00 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x8c04 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x8c20 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x8c38 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x8c3c >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0xae00 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x9604 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0xac08 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0xac0c >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0xac10 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xac14 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0xac58 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xac68 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xac6c >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac70 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac74 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac78 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac7c >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac80 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac84 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac88 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac8c >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0x970c >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x9714 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x9718 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x971c >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0x31068 >> 2),
631         0x00000000,
632         (0x4e00 << 16) | (0x31068 >> 2),
633         0x00000000,
634         (0x5e00 << 16) | (0x31068 >> 2),
635         0x00000000,
636         (0x6e00 << 16) | (0x31068 >> 2),
637         0x00000000,
638         (0x7e00 << 16) | (0x31068 >> 2),
639         0x00000000,
640         (0x8e00 << 16) | (0x31068 >> 2),
641         0x00000000,
642         (0x9e00 << 16) | (0x31068 >> 2),
643         0x00000000,
644         (0xae00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0xbe00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0xcd10 >> 2),
649         0x00000000,
650         (0x0e00 << 16) | (0xcd14 >> 2),
651         0x00000000,
652         (0x0e00 << 16) | (0x88b0 >> 2),
653         0x00000000,
654         (0x0e00 << 16) | (0x88b4 >> 2),
655         0x00000000,
656         (0x0e00 << 16) | (0x88b8 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0x88bc >> 2),
659         0x00000000,
660         (0x0400 << 16) | (0x89c0 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0x88c4 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0x88c8 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88d0 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88d4 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0x88d8 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x8980 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0x30938 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x3093c >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x30940 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x89a0 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x30900 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x30904 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x89b4 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x3c210 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x3c214 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x3c218 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x8904 >> 2),
695         0x00000000,
696         0x5,
697         (0x0e00 << 16) | (0x8c28 >> 2),
698         (0x0e00 << 16) | (0x8c2c >> 2),
699         (0x0e00 << 16) | (0x8c30 >> 2),
700         (0x0e00 << 16) | (0x8c34 >> 2),
701         (0x0e00 << 16) | (0x9600 >> 2),
702 };
703
704 static const u32 kalindi_rlc_save_restore_register_list[] =
705 {
706         (0x0e00 << 16) | (0xc12c >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0xc140 >> 2),
709         0x00000000,
710         (0x0e00 << 16) | (0xc150 >> 2),
711         0x00000000,
712         (0x0e00 << 16) | (0xc15c >> 2),
713         0x00000000,
714         (0x0e00 << 16) | (0xc168 >> 2),
715         0x00000000,
716         (0x0e00 << 16) | (0xc170 >> 2),
717         0x00000000,
718         (0x0e00 << 16) | (0xc204 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc2b4 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc2b8 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc2bc >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc2c0 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0x8228 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0x829c >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0x869c >> 2),
733         0x00000000,
734         (0x0600 << 16) | (0x98f4 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0x98f8 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0x9900 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0xc260 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x90e8 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0x3c000 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x3c00c >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x8c1c >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0x9700 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0xcd20 >> 2),
753         0x00000000,
754         (0x4e00 << 16) | (0xcd20 >> 2),
755         0x00000000,
756         (0x5e00 << 16) | (0xcd20 >> 2),
757         0x00000000,
758         (0x6e00 << 16) | (0xcd20 >> 2),
759         0x00000000,
760         (0x7e00 << 16) | (0xcd20 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x89bc >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0x8900 >> 2),
765         0x00000000,
766         0x3,
767         (0x0e00 << 16) | (0xc130 >> 2),
768         0x00000000,
769         (0x0e00 << 16) | (0xc134 >> 2),
770         0x00000000,
771         (0x0e00 << 16) | (0xc1fc >> 2),
772         0x00000000,
773         (0x0e00 << 16) | (0xc208 >> 2),
774         0x00000000,
775         (0x0e00 << 16) | (0xc264 >> 2),
776         0x00000000,
777         (0x0e00 << 16) | (0xc268 >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0xc26c >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc270 >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc274 >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc28c >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc290 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc294 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc298 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc2a0 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc2a4 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc2a8 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc2ac >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x301d0 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x30238 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x30250 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0x30254 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x30258 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x3025c >> 2),
812         0x00000000,
813         (0x4e00 << 16) | (0xc900 >> 2),
814         0x00000000,
815         (0x5e00 << 16) | (0xc900 >> 2),
816         0x00000000,
817         (0x6e00 << 16) | (0xc900 >> 2),
818         0x00000000,
819         (0x7e00 << 16) | (0xc900 >> 2),
820         0x00000000,
821         (0x4e00 << 16) | (0xc904 >> 2),
822         0x00000000,
823         (0x5e00 << 16) | (0xc904 >> 2),
824         0x00000000,
825         (0x6e00 << 16) | (0xc904 >> 2),
826         0x00000000,
827         (0x7e00 << 16) | (0xc904 >> 2),
828         0x00000000,
829         (0x4e00 << 16) | (0xc908 >> 2),
830         0x00000000,
831         (0x5e00 << 16) | (0xc908 >> 2),
832         0x00000000,
833         (0x6e00 << 16) | (0xc908 >> 2),
834         0x00000000,
835         (0x7e00 << 16) | (0xc908 >> 2),
836         0x00000000,
837         (0x4e00 << 16) | (0xc90c >> 2),
838         0x00000000,
839         (0x5e00 << 16) | (0xc90c >> 2),
840         0x00000000,
841         (0x6e00 << 16) | (0xc90c >> 2),
842         0x00000000,
843         (0x7e00 << 16) | (0xc90c >> 2),
844         0x00000000,
845         (0x4e00 << 16) | (0xc910 >> 2),
846         0x00000000,
847         (0x5e00 << 16) | (0xc910 >> 2),
848         0x00000000,
849         (0x6e00 << 16) | (0xc910 >> 2),
850         0x00000000,
851         (0x7e00 << 16) | (0xc910 >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0xc99c >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0x9834 >> 2),
856         0x00000000,
857         (0x0000 << 16) | (0x30f00 >> 2),
858         0x00000000,
859         (0x0000 << 16) | (0x30f04 >> 2),
860         0x00000000,
861         (0x0000 << 16) | (0x30f08 >> 2),
862         0x00000000,
863         (0x0000 << 16) | (0x30f0c >> 2),
864         0x00000000,
865         (0x0600 << 16) | (0x9b7c >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0x8a14 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0x8a18 >> 2),
870         0x00000000,
871         (0x0600 << 16) | (0x30a00 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x8bf0 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x8bcc >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x8b24 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x30a04 >> 2),
880         0x00000000,
881         (0x0600 << 16) | (0x30a10 >> 2),
882         0x00000000,
883         (0x0600 << 16) | (0x30a14 >> 2),
884         0x00000000,
885         (0x0600 << 16) | (0x30a18 >> 2),
886         0x00000000,
887         (0x0600 << 16) | (0x30a2c >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0xc700 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0xc704 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0xc708 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0xc768 >> 2),
896         0x00000000,
897         (0x0400 << 16) | (0xc770 >> 2),
898         0x00000000,
899         (0x0400 << 16) | (0xc774 >> 2),
900         0x00000000,
901         (0x0400 << 16) | (0xc798 >> 2),
902         0x00000000,
903         (0x0400 << 16) | (0xc79c >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x9100 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x3c010 >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x8c00 >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x8c04 >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x8c20 >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x8c38 >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x8c3c >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0xae00 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x9604 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0xac08 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0xac0c >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0xac10 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0xac14 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0xac58 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0xac68 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0xac6c >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac70 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac74 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac78 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac7c >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac80 >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac84 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac88 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac8c >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0x970c >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0x9714 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0x9718 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0x971c >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0x31068 >> 2),
962         0x00000000,
963         (0x4e00 << 16) | (0x31068 >> 2),
964         0x00000000,
965         (0x5e00 << 16) | (0x31068 >> 2),
966         0x00000000,
967         (0x6e00 << 16) | (0x31068 >> 2),
968         0x00000000,
969         (0x7e00 << 16) | (0x31068 >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0xcd10 >> 2),
972         0x00000000,
973         (0x0e00 << 16) | (0xcd14 >> 2),
974         0x00000000,
975         (0x0e00 << 16) | (0x88b0 >> 2),
976         0x00000000,
977         (0x0e00 << 16) | (0x88b4 >> 2),
978         0x00000000,
979         (0x0e00 << 16) | (0x88b8 >> 2),
980         0x00000000,
981         (0x0e00 << 16) | (0x88bc >> 2),
982         0x00000000,
983         (0x0400 << 16) | (0x89c0 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0x88c4 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0x88c8 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88d0 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88d4 >> 2),
992         0x00000000,
993         (0x0e00 << 16) | (0x88d8 >> 2),
994         0x00000000,
995         (0x0e00 << 16) | (0x8980 >> 2),
996         0x00000000,
997         (0x0e00 << 16) | (0x30938 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x3093c >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x30940 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x89a0 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x30900 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x30904 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x89b4 >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x3e1fc >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x3c210 >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x3c214 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x3c218 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x8904 >> 2),
1020         0x00000000,
1021         0x5,
1022         (0x0e00 << 16) | (0x8c28 >> 2),
1023         (0x0e00 << 16) | (0x8c2c >> 2),
1024         (0x0e00 << 16) | (0x8c30 >> 2),
1025         (0x0e00 << 16) | (0x8c34 >> 2),
1026         (0x0e00 << 16) | (0x9600 >> 2),
1027 };
1028
1029 static const u32 bonaire_golden_spm_registers[] =
1030 {
1031         0x30800, 0xe0ffffff, 0xe0000000
1032 };
1033
1034 static const u32 bonaire_golden_common_registers[] =
1035 {
1036         0xc770, 0xffffffff, 0x00000800,
1037         0xc774, 0xffffffff, 0x00000800,
1038         0xc798, 0xffffffff, 0x00007fbf,
1039         0xc79c, 0xffffffff, 0x00007faf
1040 };
1041
1042 static const u32 bonaire_golden_registers[] =
1043 {
1044         0x3354, 0x00000333, 0x00000333,
1045         0x3350, 0x000c0fc0, 0x00040200,
1046         0x9a10, 0x00010000, 0x00058208,
1047         0x3c000, 0xffff1fff, 0x00140000,
1048         0x3c200, 0xfdfc0fff, 0x00000100,
1049         0x3c234, 0x40000000, 0x40000200,
1050         0x9830, 0xffffffff, 0x00000000,
1051         0x9834, 0xf00fffff, 0x00000400,
1052         0x9838, 0x0002021c, 0x00020200,
1053         0xc78, 0x00000080, 0x00000000,
1054         0x5bb0, 0x000000f0, 0x00000070,
1055         0x5bc0, 0xf0311fff, 0x80300000,
1056         0x98f8, 0x73773777, 0x12010001,
1057         0x350c, 0x00810000, 0x408af000,
1058         0x7030, 0x31000111, 0x00000011,
1059         0x2f48, 0x73773777, 0x12010001,
1060         0x220c, 0x00007fb6, 0x0021a1b1,
1061         0x2210, 0x00007fb6, 0x002021b1,
1062         0x2180, 0x00007fb6, 0x00002191,
1063         0x2218, 0x00007fb6, 0x002121b1,
1064         0x221c, 0x00007fb6, 0x002021b1,
1065         0x21dc, 0x00007fb6, 0x00002191,
1066         0x21e0, 0x00007fb6, 0x00002191,
1067         0x3628, 0x0000003f, 0x0000000a,
1068         0x362c, 0x0000003f, 0x0000000a,
1069         0x2ae4, 0x00073ffe, 0x000022a2,
1070         0x240c, 0x000007ff, 0x00000000,
1071         0x8a14, 0xf000003f, 0x00000007,
1072         0x8bf0, 0x00002001, 0x00000001,
1073         0x8b24, 0xffffffff, 0x00ffffff,
1074         0x30a04, 0x0000ff0f, 0x00000000,
1075         0x28a4c, 0x07ffffff, 0x06000000,
1076         0x4d8, 0x00000fff, 0x00000100,
1077         0x3e78, 0x00000001, 0x00000002,
1078         0x9100, 0x03000000, 0x0362c688,
1079         0x8c00, 0x000000ff, 0x00000001,
1080         0xe40, 0x00001fff, 0x00001fff,
1081         0x9060, 0x0000007f, 0x00000020,
1082         0x9508, 0x00010000, 0x00010000,
1083         0xac14, 0x000003ff, 0x000000f3,
1084         0xac0c, 0xffffffff, 0x00001032
1085 };
1086
1087 static const u32 bonaire_mgcg_cgcg_init[] =
1088 {
1089         0xc420, 0xffffffff, 0xfffffffc,
1090         0x30800, 0xffffffff, 0xe0000000,
1091         0x3c2a0, 0xffffffff, 0x00000100,
1092         0x3c208, 0xffffffff, 0x00000100,
1093         0x3c2c0, 0xffffffff, 0xc0000100,
1094         0x3c2c8, 0xffffffff, 0xc0000100,
1095         0x3c2c4, 0xffffffff, 0xc0000100,
1096         0x55e4, 0xffffffff, 0x00600100,
1097         0x3c280, 0xffffffff, 0x00000100,
1098         0x3c214, 0xffffffff, 0x06000100,
1099         0x3c220, 0xffffffff, 0x00000100,
1100         0x3c218, 0xffffffff, 0x06000100,
1101         0x3c204, 0xffffffff, 0x00000100,
1102         0x3c2e0, 0xffffffff, 0x00000100,
1103         0x3c224, 0xffffffff, 0x00000100,
1104         0x3c200, 0xffffffff, 0x00000100,
1105         0x3c230, 0xffffffff, 0x00000100,
1106         0x3c234, 0xffffffff, 0x00000100,
1107         0x3c250, 0xffffffff, 0x00000100,
1108         0x3c254, 0xffffffff, 0x00000100,
1109         0x3c258, 0xffffffff, 0x00000100,
1110         0x3c25c, 0xffffffff, 0x00000100,
1111         0x3c260, 0xffffffff, 0x00000100,
1112         0x3c27c, 0xffffffff, 0x00000100,
1113         0x3c278, 0xffffffff, 0x00000100,
1114         0x3c210, 0xffffffff, 0x06000100,
1115         0x3c290, 0xffffffff, 0x00000100,
1116         0x3c274, 0xffffffff, 0x00000100,
1117         0x3c2b4, 0xffffffff, 0x00000100,
1118         0x3c2b0, 0xffffffff, 0x00000100,
1119         0x3c270, 0xffffffff, 0x00000100,
1120         0x30800, 0xffffffff, 0xe0000000,
1121         0x3c020, 0xffffffff, 0x00010000,
1122         0x3c024, 0xffffffff, 0x00030002,
1123         0x3c028, 0xffffffff, 0x00040007,
1124         0x3c02c, 0xffffffff, 0x00060005,
1125         0x3c030, 0xffffffff, 0x00090008,
1126         0x3c034, 0xffffffff, 0x00010000,
1127         0x3c038, 0xffffffff, 0x00030002,
1128         0x3c03c, 0xffffffff, 0x00040007,
1129         0x3c040, 0xffffffff, 0x00060005,
1130         0x3c044, 0xffffffff, 0x00090008,
1131         0x3c048, 0xffffffff, 0x00010000,
1132         0x3c04c, 0xffffffff, 0x00030002,
1133         0x3c050, 0xffffffff, 0x00040007,
1134         0x3c054, 0xffffffff, 0x00060005,
1135         0x3c058, 0xffffffff, 0x00090008,
1136         0x3c05c, 0xffffffff, 0x00010000,
1137         0x3c060, 0xffffffff, 0x00030002,
1138         0x3c064, 0xffffffff, 0x00040007,
1139         0x3c068, 0xffffffff, 0x00060005,
1140         0x3c06c, 0xffffffff, 0x00090008,
1141         0x3c070, 0xffffffff, 0x00010000,
1142         0x3c074, 0xffffffff, 0x00030002,
1143         0x3c078, 0xffffffff, 0x00040007,
1144         0x3c07c, 0xffffffff, 0x00060005,
1145         0x3c080, 0xffffffff, 0x00090008,
1146         0x3c084, 0xffffffff, 0x00010000,
1147         0x3c088, 0xffffffff, 0x00030002,
1148         0x3c08c, 0xffffffff, 0x00040007,
1149         0x3c090, 0xffffffff, 0x00060005,
1150         0x3c094, 0xffffffff, 0x00090008,
1151         0x3c098, 0xffffffff, 0x00010000,
1152         0x3c09c, 0xffffffff, 0x00030002,
1153         0x3c0a0, 0xffffffff, 0x00040007,
1154         0x3c0a4, 0xffffffff, 0x00060005,
1155         0x3c0a8, 0xffffffff, 0x00090008,
1156         0x3c000, 0xffffffff, 0x96e00200,
1157         0x8708, 0xffffffff, 0x00900100,
1158         0xc424, 0xffffffff, 0x0020003f,
1159         0x38, 0xffffffff, 0x0140001c,
1160         0x3c, 0x000f0000, 0x000f0000,
1161         0x220, 0xffffffff, 0xC060000C,
1162         0x224, 0xc0000fff, 0x00000100,
1163         0xf90, 0xffffffff, 0x00000100,
1164         0xf98, 0x00000101, 0x00000000,
1165         0x20a8, 0xffffffff, 0x00000104,
1166         0x55e4, 0xff000fff, 0x00000100,
1167         0x30cc, 0xc0000fff, 0x00000104,
1168         0xc1e4, 0x00000001, 0x00000001,
1169         0xd00c, 0xff000ff0, 0x00000100,
1170         0xd80c, 0xff000ff0, 0x00000100
1171 };
1172
1173 static const u32 spectre_golden_spm_registers[] =
1174 {
1175         0x30800, 0xe0ffffff, 0xe0000000
1176 };
1177
1178 static const u32 spectre_golden_common_registers[] =
1179 {
1180         0xc770, 0xffffffff, 0x00000800,
1181         0xc774, 0xffffffff, 0x00000800,
1182         0xc798, 0xffffffff, 0x00007fbf,
1183         0xc79c, 0xffffffff, 0x00007faf
1184 };
1185
1186 static const u32 spectre_golden_registers[] =
1187 {
1188         0x3c000, 0xffff1fff, 0x96940200,
1189         0x3c00c, 0xffff0001, 0xff000000,
1190         0x3c200, 0xfffc0fff, 0x00000100,
1191         0x6ed8, 0x00010101, 0x00010000,
1192         0x9834, 0xf00fffff, 0x00000400,
1193         0x9838, 0xfffffffc, 0x00020200,
1194         0x5bb0, 0x000000f0, 0x00000070,
1195         0x5bc0, 0xf0311fff, 0x80300000,
1196         0x98f8, 0x73773777, 0x12010001,
1197         0x9b7c, 0x00ff0000, 0x00fc0000,
1198         0x2f48, 0x73773777, 0x12010001,
1199         0x8a14, 0xf000003f, 0x00000007,
1200         0x8b24, 0xffffffff, 0x00ffffff,
1201         0x28350, 0x3f3f3fff, 0x00000082,
1202         0x28354, 0x0000003f, 0x00000000,
1203         0x3e78, 0x00000001, 0x00000002,
1204         0x913c, 0xffff03df, 0x00000004,
1205         0xc768, 0x00000008, 0x00000008,
1206         0x8c00, 0x000008ff, 0x00000800,
1207         0x9508, 0x00010000, 0x00010000,
1208         0xac0c, 0xffffffff, 0x54763210,
1209         0x214f8, 0x01ff01ff, 0x00000002,
1210         0x21498, 0x007ff800, 0x00200000,
1211         0x2015c, 0xffffffff, 0x00000f40,
1212         0x30934, 0xffffffff, 0x00000001
1213 };
1214
1215 static const u32 spectre_mgcg_cgcg_init[] =
1216 {
1217         0xc420, 0xffffffff, 0xfffffffc,
1218         0x30800, 0xffffffff, 0xe0000000,
1219         0x3c2a0, 0xffffffff, 0x00000100,
1220         0x3c208, 0xffffffff, 0x00000100,
1221         0x3c2c0, 0xffffffff, 0x00000100,
1222         0x3c2c8, 0xffffffff, 0x00000100,
1223         0x3c2c4, 0xffffffff, 0x00000100,
1224         0x55e4, 0xffffffff, 0x00600100,
1225         0x3c280, 0xffffffff, 0x00000100,
1226         0x3c214, 0xffffffff, 0x06000100,
1227         0x3c220, 0xffffffff, 0x00000100,
1228         0x3c218, 0xffffffff, 0x06000100,
1229         0x3c204, 0xffffffff, 0x00000100,
1230         0x3c2e0, 0xffffffff, 0x00000100,
1231         0x3c224, 0xffffffff, 0x00000100,
1232         0x3c200, 0xffffffff, 0x00000100,
1233         0x3c230, 0xffffffff, 0x00000100,
1234         0x3c234, 0xffffffff, 0x00000100,
1235         0x3c250, 0xffffffff, 0x00000100,
1236         0x3c254, 0xffffffff, 0x00000100,
1237         0x3c258, 0xffffffff, 0x00000100,
1238         0x3c25c, 0xffffffff, 0x00000100,
1239         0x3c260, 0xffffffff, 0x00000100,
1240         0x3c27c, 0xffffffff, 0x00000100,
1241         0x3c278, 0xffffffff, 0x00000100,
1242         0x3c210, 0xffffffff, 0x06000100,
1243         0x3c290, 0xffffffff, 0x00000100,
1244         0x3c274, 0xffffffff, 0x00000100,
1245         0x3c2b4, 0xffffffff, 0x00000100,
1246         0x3c2b0, 0xffffffff, 0x00000100,
1247         0x3c270, 0xffffffff, 0x00000100,
1248         0x30800, 0xffffffff, 0xe0000000,
1249         0x3c020, 0xffffffff, 0x00010000,
1250         0x3c024, 0xffffffff, 0x00030002,
1251         0x3c028, 0xffffffff, 0x00040007,
1252         0x3c02c, 0xffffffff, 0x00060005,
1253         0x3c030, 0xffffffff, 0x00090008,
1254         0x3c034, 0xffffffff, 0x00010000,
1255         0x3c038, 0xffffffff, 0x00030002,
1256         0x3c03c, 0xffffffff, 0x00040007,
1257         0x3c040, 0xffffffff, 0x00060005,
1258         0x3c044, 0xffffffff, 0x00090008,
1259         0x3c048, 0xffffffff, 0x00010000,
1260         0x3c04c, 0xffffffff, 0x00030002,
1261         0x3c050, 0xffffffff, 0x00040007,
1262         0x3c054, 0xffffffff, 0x00060005,
1263         0x3c058, 0xffffffff, 0x00090008,
1264         0x3c05c, 0xffffffff, 0x00010000,
1265         0x3c060, 0xffffffff, 0x00030002,
1266         0x3c064, 0xffffffff, 0x00040007,
1267         0x3c068, 0xffffffff, 0x00060005,
1268         0x3c06c, 0xffffffff, 0x00090008,
1269         0x3c070, 0xffffffff, 0x00010000,
1270         0x3c074, 0xffffffff, 0x00030002,
1271         0x3c078, 0xffffffff, 0x00040007,
1272         0x3c07c, 0xffffffff, 0x00060005,
1273         0x3c080, 0xffffffff, 0x00090008,
1274         0x3c084, 0xffffffff, 0x00010000,
1275         0x3c088, 0xffffffff, 0x00030002,
1276         0x3c08c, 0xffffffff, 0x00040007,
1277         0x3c090, 0xffffffff, 0x00060005,
1278         0x3c094, 0xffffffff, 0x00090008,
1279         0x3c098, 0xffffffff, 0x00010000,
1280         0x3c09c, 0xffffffff, 0x00030002,
1281         0x3c0a0, 0xffffffff, 0x00040007,
1282         0x3c0a4, 0xffffffff, 0x00060005,
1283         0x3c0a8, 0xffffffff, 0x00090008,
1284         0x3c0ac, 0xffffffff, 0x00010000,
1285         0x3c0b0, 0xffffffff, 0x00030002,
1286         0x3c0b4, 0xffffffff, 0x00040007,
1287         0x3c0b8, 0xffffffff, 0x00060005,
1288         0x3c0bc, 0xffffffff, 0x00090008,
1289         0x3c000, 0xffffffff, 0x96e00200,
1290         0x8708, 0xffffffff, 0x00900100,
1291         0xc424, 0xffffffff, 0x0020003f,
1292         0x38, 0xffffffff, 0x0140001c,
1293         0x3c, 0x000f0000, 0x000f0000,
1294         0x220, 0xffffffff, 0xC060000C,
1295         0x224, 0xc0000fff, 0x00000100,
1296         0xf90, 0xffffffff, 0x00000100,
1297         0xf98, 0x00000101, 0x00000000,
1298         0x20a8, 0xffffffff, 0x00000104,
1299         0x55e4, 0xff000fff, 0x00000100,
1300         0x30cc, 0xc0000fff, 0x00000104,
1301         0xc1e4, 0x00000001, 0x00000001,
1302         0xd00c, 0xff000ff0, 0x00000100,
1303         0xd80c, 0xff000ff0, 0x00000100
1304 };
1305
1306 static const u32 kalindi_golden_spm_registers[] =
1307 {
1308         0x30800, 0xe0ffffff, 0xe0000000
1309 };
1310
1311 static const u32 kalindi_golden_common_registers[] =
1312 {
1313         0xc770, 0xffffffff, 0x00000800,
1314         0xc774, 0xffffffff, 0x00000800,
1315         0xc798, 0xffffffff, 0x00007fbf,
1316         0xc79c, 0xffffffff, 0x00007faf
1317 };
1318
1319 static const u32 kalindi_golden_registers[] =
1320 {
1321         0x3c000, 0xffffdfff, 0x6e944040,
1322         0x55e4, 0xff607fff, 0xfc000100,
1323         0x3c220, 0xff000fff, 0x00000100,
1324         0x3c224, 0xff000fff, 0x00000100,
1325         0x3c200, 0xfffc0fff, 0x00000100,
1326         0x6ed8, 0x00010101, 0x00010000,
1327         0x9830, 0xffffffff, 0x00000000,
1328         0x9834, 0xf00fffff, 0x00000400,
1329         0x5bb0, 0x000000f0, 0x00000070,
1330         0x5bc0, 0xf0311fff, 0x80300000,
1331         0x98f8, 0x73773777, 0x12010001,
1332         0x98fc, 0xffffffff, 0x00000010,
1333         0x9b7c, 0x00ff0000, 0x00fc0000,
1334         0x8030, 0x00001f0f, 0x0000100a,
1335         0x2f48, 0x73773777, 0x12010001,
1336         0x2408, 0x000fffff, 0x000c007f,
1337         0x8a14, 0xf000003f, 0x00000007,
1338         0x8b24, 0x3fff3fff, 0x00ffcfff,
1339         0x30a04, 0x0000ff0f, 0x00000000,
1340         0x28a4c, 0x07ffffff, 0x06000000,
1341         0x4d8, 0x00000fff, 0x00000100,
1342         0x3e78, 0x00000001, 0x00000002,
1343         0xc768, 0x00000008, 0x00000008,
1344         0x8c00, 0x000000ff, 0x00000003,
1345         0x214f8, 0x01ff01ff, 0x00000002,
1346         0x21498, 0x007ff800, 0x00200000,
1347         0x2015c, 0xffffffff, 0x00000f40,
1348         0x88c4, 0x001f3ae3, 0x00000082,
1349         0x88d4, 0x0000001f, 0x00000010,
1350         0x30934, 0xffffffff, 0x00000000
1351 };
1352
1353 static const u32 kalindi_mgcg_cgcg_init[] =
1354 {
1355         0xc420, 0xffffffff, 0xfffffffc,
1356         0x30800, 0xffffffff, 0xe0000000,
1357         0x3c2a0, 0xffffffff, 0x00000100,
1358         0x3c208, 0xffffffff, 0x00000100,
1359         0x3c2c0, 0xffffffff, 0x00000100,
1360         0x3c2c8, 0xffffffff, 0x00000100,
1361         0x3c2c4, 0xffffffff, 0x00000100,
1362         0x55e4, 0xffffffff, 0x00600100,
1363         0x3c280, 0xffffffff, 0x00000100,
1364         0x3c214, 0xffffffff, 0x06000100,
1365         0x3c220, 0xffffffff, 0x00000100,
1366         0x3c218, 0xffffffff, 0x06000100,
1367         0x3c204, 0xffffffff, 0x00000100,
1368         0x3c2e0, 0xffffffff, 0x00000100,
1369         0x3c224, 0xffffffff, 0x00000100,
1370         0x3c200, 0xffffffff, 0x00000100,
1371         0x3c230, 0xffffffff, 0x00000100,
1372         0x3c234, 0xffffffff, 0x00000100,
1373         0x3c250, 0xffffffff, 0x00000100,
1374         0x3c254, 0xffffffff, 0x00000100,
1375         0x3c258, 0xffffffff, 0x00000100,
1376         0x3c25c, 0xffffffff, 0x00000100,
1377         0x3c260, 0xffffffff, 0x00000100,
1378         0x3c27c, 0xffffffff, 0x00000100,
1379         0x3c278, 0xffffffff, 0x00000100,
1380         0x3c210, 0xffffffff, 0x06000100,
1381         0x3c290, 0xffffffff, 0x00000100,
1382         0x3c274, 0xffffffff, 0x00000100,
1383         0x3c2b4, 0xffffffff, 0x00000100,
1384         0x3c2b0, 0xffffffff, 0x00000100,
1385         0x3c270, 0xffffffff, 0x00000100,
1386         0x30800, 0xffffffff, 0xe0000000,
1387         0x3c020, 0xffffffff, 0x00010000,
1388         0x3c024, 0xffffffff, 0x00030002,
1389         0x3c028, 0xffffffff, 0x00040007,
1390         0x3c02c, 0xffffffff, 0x00060005,
1391         0x3c030, 0xffffffff, 0x00090008,
1392         0x3c034, 0xffffffff, 0x00010000,
1393         0x3c038, 0xffffffff, 0x00030002,
1394         0x3c03c, 0xffffffff, 0x00040007,
1395         0x3c040, 0xffffffff, 0x00060005,
1396         0x3c044, 0xffffffff, 0x00090008,
1397         0x3c000, 0xffffffff, 0x96e00200,
1398         0x8708, 0xffffffff, 0x00900100,
1399         0xc424, 0xffffffff, 0x0020003f,
1400         0x38, 0xffffffff, 0x0140001c,
1401         0x3c, 0x000f0000, 0x000f0000,
1402         0x220, 0xffffffff, 0xC060000C,
1403         0x224, 0xc0000fff, 0x00000100,
1404         0x20a8, 0xffffffff, 0x00000104,
1405         0x55e4, 0xff000fff, 0x00000100,
1406         0x30cc, 0xc0000fff, 0x00000104,
1407         0xc1e4, 0x00000001, 0x00000001,
1408         0xd00c, 0xff000ff0, 0x00000100,
1409         0xd80c, 0xff000ff0, 0x00000100
1410 };
1411
1412 static const u32 hawaii_golden_spm_registers[] =
1413 {
1414         0x30800, 0xe0ffffff, 0xe0000000
1415 };
1416
1417 static const u32 hawaii_golden_common_registers[] =
1418 {
1419         0x30800, 0xffffffff, 0xe0000000,
1420         0x28350, 0xffffffff, 0x3a00161a,
1421         0x28354, 0xffffffff, 0x0000002e,
1422         0x9a10, 0xffffffff, 0x00018208,
1423         0x98f8, 0xffffffff, 0x12011003
1424 };
1425
1426 static const u32 hawaii_golden_registers[] =
1427 {
1428         0x3354, 0x00000333, 0x00000333,
1429         0x9a10, 0x00010000, 0x00058208,
1430         0x9830, 0xffffffff, 0x00000000,
1431         0x9834, 0xf00fffff, 0x00000400,
1432         0x9838, 0x0002021c, 0x00020200,
1433         0xc78, 0x00000080, 0x00000000,
1434         0x5bb0, 0x000000f0, 0x00000070,
1435         0x5bc0, 0xf0311fff, 0x80300000,
1436         0x350c, 0x00810000, 0x408af000,
1437         0x7030, 0x31000111, 0x00000011,
1438         0x2f48, 0x73773777, 0x12010001,
1439         0x2120, 0x0000007f, 0x0000001b,
1440         0x21dc, 0x00007fb6, 0x00002191,
1441         0x3628, 0x0000003f, 0x0000000a,
1442         0x362c, 0x0000003f, 0x0000000a,
1443         0x2ae4, 0x00073ffe, 0x000022a2,
1444         0x240c, 0x000007ff, 0x00000000,
1445         0x8bf0, 0x00002001, 0x00000001,
1446         0x8b24, 0xffffffff, 0x00ffffff,
1447         0x30a04, 0x0000ff0f, 0x00000000,
1448         0x28a4c, 0x07ffffff, 0x06000000,
1449         0x3e78, 0x00000001, 0x00000002,
1450         0xc768, 0x00000008, 0x00000008,
1451         0xc770, 0x00000f00, 0x00000800,
1452         0xc774, 0x00000f00, 0x00000800,
1453         0xc798, 0x00ffffff, 0x00ff7fbf,
1454         0xc79c, 0x00ffffff, 0x00ff7faf,
1455         0x8c00, 0x000000ff, 0x00000800,
1456         0xe40, 0x00001fff, 0x00001fff,
1457         0x9060, 0x0000007f, 0x00000020,
1458         0x9508, 0x00010000, 0x00010000,
1459         0xae00, 0x00100000, 0x000ff07c,
1460         0xac14, 0x000003ff, 0x0000000f,
1461         0xac10, 0xffffffff, 0x7564fdec,
1462         0xac0c, 0xffffffff, 0x3120b9a8,
1463         0xac08, 0x20000000, 0x0f9c0000
1464 };
1465
1466 static const u32 hawaii_mgcg_cgcg_init[] =
1467 {
1468         0xc420, 0xffffffff, 0xfffffffd,
1469         0x30800, 0xffffffff, 0xe0000000,
1470         0x3c2a0, 0xffffffff, 0x00000100,
1471         0x3c208, 0xffffffff, 0x00000100,
1472         0x3c2c0, 0xffffffff, 0x00000100,
1473         0x3c2c8, 0xffffffff, 0x00000100,
1474         0x3c2c4, 0xffffffff, 0x00000100,
1475         0x55e4, 0xffffffff, 0x00200100,
1476         0x3c280, 0xffffffff, 0x00000100,
1477         0x3c214, 0xffffffff, 0x06000100,
1478         0x3c220, 0xffffffff, 0x00000100,
1479         0x3c218, 0xffffffff, 0x06000100,
1480         0x3c204, 0xffffffff, 0x00000100,
1481         0x3c2e0, 0xffffffff, 0x00000100,
1482         0x3c224, 0xffffffff, 0x00000100,
1483         0x3c200, 0xffffffff, 0x00000100,
1484         0x3c230, 0xffffffff, 0x00000100,
1485         0x3c234, 0xffffffff, 0x00000100,
1486         0x3c250, 0xffffffff, 0x00000100,
1487         0x3c254, 0xffffffff, 0x00000100,
1488         0x3c258, 0xffffffff, 0x00000100,
1489         0x3c25c, 0xffffffff, 0x00000100,
1490         0x3c260, 0xffffffff, 0x00000100,
1491         0x3c27c, 0xffffffff, 0x00000100,
1492         0x3c278, 0xffffffff, 0x00000100,
1493         0x3c210, 0xffffffff, 0x06000100,
1494         0x3c290, 0xffffffff, 0x00000100,
1495         0x3c274, 0xffffffff, 0x00000100,
1496         0x3c2b4, 0xffffffff, 0x00000100,
1497         0x3c2b0, 0xffffffff, 0x00000100,
1498         0x3c270, 0xffffffff, 0x00000100,
1499         0x30800, 0xffffffff, 0xe0000000,
1500         0x3c020, 0xffffffff, 0x00010000,
1501         0x3c024, 0xffffffff, 0x00030002,
1502         0x3c028, 0xffffffff, 0x00040007,
1503         0x3c02c, 0xffffffff, 0x00060005,
1504         0x3c030, 0xffffffff, 0x00090008,
1505         0x3c034, 0xffffffff, 0x00010000,
1506         0x3c038, 0xffffffff, 0x00030002,
1507         0x3c03c, 0xffffffff, 0x00040007,
1508         0x3c040, 0xffffffff, 0x00060005,
1509         0x3c044, 0xffffffff, 0x00090008,
1510         0x3c048, 0xffffffff, 0x00010000,
1511         0x3c04c, 0xffffffff, 0x00030002,
1512         0x3c050, 0xffffffff, 0x00040007,
1513         0x3c054, 0xffffffff, 0x00060005,
1514         0x3c058, 0xffffffff, 0x00090008,
1515         0x3c05c, 0xffffffff, 0x00010000,
1516         0x3c060, 0xffffffff, 0x00030002,
1517         0x3c064, 0xffffffff, 0x00040007,
1518         0x3c068, 0xffffffff, 0x00060005,
1519         0x3c06c, 0xffffffff, 0x00090008,
1520         0x3c070, 0xffffffff, 0x00010000,
1521         0x3c074, 0xffffffff, 0x00030002,
1522         0x3c078, 0xffffffff, 0x00040007,
1523         0x3c07c, 0xffffffff, 0x00060005,
1524         0x3c080, 0xffffffff, 0x00090008,
1525         0x3c084, 0xffffffff, 0x00010000,
1526         0x3c088, 0xffffffff, 0x00030002,
1527         0x3c08c, 0xffffffff, 0x00040007,
1528         0x3c090, 0xffffffff, 0x00060005,
1529         0x3c094, 0xffffffff, 0x00090008,
1530         0x3c098, 0xffffffff, 0x00010000,
1531         0x3c09c, 0xffffffff, 0x00030002,
1532         0x3c0a0, 0xffffffff, 0x00040007,
1533         0x3c0a4, 0xffffffff, 0x00060005,
1534         0x3c0a8, 0xffffffff, 0x00090008,
1535         0x3c0ac, 0xffffffff, 0x00010000,
1536         0x3c0b0, 0xffffffff, 0x00030002,
1537         0x3c0b4, 0xffffffff, 0x00040007,
1538         0x3c0b8, 0xffffffff, 0x00060005,
1539         0x3c0bc, 0xffffffff, 0x00090008,
1540         0x3c0c0, 0xffffffff, 0x00010000,
1541         0x3c0c4, 0xffffffff, 0x00030002,
1542         0x3c0c8, 0xffffffff, 0x00040007,
1543         0x3c0cc, 0xffffffff, 0x00060005,
1544         0x3c0d0, 0xffffffff, 0x00090008,
1545         0x3c0d4, 0xffffffff, 0x00010000,
1546         0x3c0d8, 0xffffffff, 0x00030002,
1547         0x3c0dc, 0xffffffff, 0x00040007,
1548         0x3c0e0, 0xffffffff, 0x00060005,
1549         0x3c0e4, 0xffffffff, 0x00090008,
1550         0x3c0e8, 0xffffffff, 0x00010000,
1551         0x3c0ec, 0xffffffff, 0x00030002,
1552         0x3c0f0, 0xffffffff, 0x00040007,
1553         0x3c0f4, 0xffffffff, 0x00060005,
1554         0x3c0f8, 0xffffffff, 0x00090008,
1555         0xc318, 0xffffffff, 0x00020200,
1556         0x3350, 0xffffffff, 0x00000200,
1557         0x15c0, 0xffffffff, 0x00000400,
1558         0x55e8, 0xffffffff, 0x00000000,
1559         0x2f50, 0xffffffff, 0x00000902,
1560         0x3c000, 0xffffffff, 0x96940200,
1561         0x8708, 0xffffffff, 0x00900100,
1562         0xc424, 0xffffffff, 0x0020003f,
1563         0x38, 0xffffffff, 0x0140001c,
1564         0x3c, 0x000f0000, 0x000f0000,
1565         0x220, 0xffffffff, 0xc060000c,
1566         0x224, 0xc0000fff, 0x00000100,
1567         0xf90, 0xffffffff, 0x00000100,
1568         0xf98, 0x00000101, 0x00000000,
1569         0x20a8, 0xffffffff, 0x00000104,
1570         0x55e4, 0xff000fff, 0x00000100,
1571         0x30cc, 0xc0000fff, 0x00000104,
1572         0xc1e4, 0x00000001, 0x00000001,
1573         0xd00c, 0xff000ff0, 0x00000100,
1574         0xd80c, 0xff000ff0, 0x00000100
1575 };
1576
1577 static const u32 godavari_golden_registers[] =
1578 {
1579         0x55e4, 0xff607fff, 0xfc000100,
1580         0x6ed8, 0x00010101, 0x00010000,
1581         0x9830, 0xffffffff, 0x00000000,
1582         0x98302, 0xf00fffff, 0x00000400,
1583         0x6130, 0xffffffff, 0x00010000,
1584         0x5bb0, 0x000000f0, 0x00000070,
1585         0x5bc0, 0xf0311fff, 0x80300000,
1586         0x98f8, 0x73773777, 0x12010001,
1587         0x98fc, 0xffffffff, 0x00000010,
1588         0x8030, 0x00001f0f, 0x0000100a,
1589         0x2f48, 0x73773777, 0x12010001,
1590         0x2408, 0x000fffff, 0x000c007f,
1591         0x8a14, 0xf000003f, 0x00000007,
1592         0x8b24, 0xffffffff, 0x00ff0fff,
1593         0x30a04, 0x0000ff0f, 0x00000000,
1594         0x28a4c, 0x07ffffff, 0x06000000,
1595         0x4d8, 0x00000fff, 0x00000100,
1596         0xd014, 0x00010000, 0x00810001,
1597         0xd814, 0x00010000, 0x00810001,
1598         0x3e78, 0x00000001, 0x00000002,
1599         0xc768, 0x00000008, 0x00000008,
1600         0xc770, 0x00000f00, 0x00000800,
1601         0xc774, 0x00000f00, 0x00000800,
1602         0xc798, 0x00ffffff, 0x00ff7fbf,
1603         0xc79c, 0x00ffffff, 0x00ff7faf,
1604         0x8c00, 0x000000ff, 0x00000001,
1605         0x214f8, 0x01ff01ff, 0x00000002,
1606         0x21498, 0x007ff800, 0x00200000,
1607         0x2015c, 0xffffffff, 0x00000f40,
1608         0x88c4, 0x001f3ae3, 0x00000082,
1609         0x88d4, 0x0000001f, 0x00000010,
1610         0x30934, 0xffffffff, 0x00000000
1611 };
1612
1613
1614 static void cik_init_golden_registers(struct radeon_device *rdev)
1615 {
1616         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1617         mutex_lock(&rdev->grbm_idx_mutex);
1618         switch (rdev->family) {
1619         case CHIP_BONAIRE:
1620                 radeon_program_register_sequence(rdev,
1621                                                  bonaire_mgcg_cgcg_init,
1622                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1623                 radeon_program_register_sequence(rdev,
1624                                                  bonaire_golden_registers,
1625                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1626                 radeon_program_register_sequence(rdev,
1627                                                  bonaire_golden_common_registers,
1628                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1629                 radeon_program_register_sequence(rdev,
1630                                                  bonaire_golden_spm_registers,
1631                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1632                 break;
1633         case CHIP_KABINI:
1634                 radeon_program_register_sequence(rdev,
1635                                                  kalindi_mgcg_cgcg_init,
1636                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1637                 radeon_program_register_sequence(rdev,
1638                                                  kalindi_golden_registers,
1639                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1640                 radeon_program_register_sequence(rdev,
1641                                                  kalindi_golden_common_registers,
1642                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1643                 radeon_program_register_sequence(rdev,
1644                                                  kalindi_golden_spm_registers,
1645                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1646                 break;
1647         case CHIP_MULLINS:
1648                 radeon_program_register_sequence(rdev,
1649                                                  kalindi_mgcg_cgcg_init,
1650                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1651                 radeon_program_register_sequence(rdev,
1652                                                  godavari_golden_registers,
1653                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1654                 radeon_program_register_sequence(rdev,
1655                                                  kalindi_golden_common_registers,
1656                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1657                 radeon_program_register_sequence(rdev,
1658                                                  kalindi_golden_spm_registers,
1659                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1660                 break;
1661         case CHIP_KAVERI:
1662                 radeon_program_register_sequence(rdev,
1663                                                  spectre_mgcg_cgcg_init,
1664                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1665                 radeon_program_register_sequence(rdev,
1666                                                  spectre_golden_registers,
1667                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1668                 radeon_program_register_sequence(rdev,
1669                                                  spectre_golden_common_registers,
1670                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1671                 radeon_program_register_sequence(rdev,
1672                                                  spectre_golden_spm_registers,
1673                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1674                 break;
1675         case CHIP_HAWAII:
1676                 radeon_program_register_sequence(rdev,
1677                                                  hawaii_mgcg_cgcg_init,
1678                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1679                 radeon_program_register_sequence(rdev,
1680                                                  hawaii_golden_registers,
1681                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1682                 radeon_program_register_sequence(rdev,
1683                                                  hawaii_golden_common_registers,
1684                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1685                 radeon_program_register_sequence(rdev,
1686                                                  hawaii_golden_spm_registers,
1687                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1688                 break;
1689         default:
1690                 break;
1691         }
1692         mutex_unlock(&rdev->grbm_idx_mutex);
1693 }
1694
1695 /**
1696  * cik_get_xclk - get the xclk
1697  *
1698  * @rdev: radeon_device pointer
1699  *
1700  * Returns the reference clock used by the gfx engine
1701  * (CIK).
1702  */
1703 u32 cik_get_xclk(struct radeon_device *rdev)
1704 {
1705         u32 reference_clock = rdev->clock.spll.reference_freq;
1706
1707         if (rdev->flags & RADEON_IS_IGP) {
1708                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1709                         return reference_clock / 2;
1710         } else {
1711                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1712                         return reference_clock / 4;
1713         }
1714         return reference_clock;
1715 }
1716
1717 /**
1718  * cik_mm_rdoorbell - read a doorbell dword
1719  *
1720  * @rdev: radeon_device pointer
1721  * @index: doorbell index
1722  *
1723  * Returns the value in the doorbell aperture at the
1724  * requested doorbell index (CIK).
1725  */
1726 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1727 {
1728         if (index < rdev->doorbell.num_doorbells) {
1729                 return readl(rdev->doorbell.ptr + index);
1730         } else {
1731                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1732                 return 0;
1733         }
1734 }
1735
1736 /**
1737  * cik_mm_wdoorbell - write a doorbell dword
1738  *
1739  * @rdev: radeon_device pointer
1740  * @index: doorbell index
1741  * @v: value to write
1742  *
1743  * Writes @v to the doorbell aperture at the
1744  * requested doorbell index (CIK).
1745  */
1746 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1747 {
1748         if (index < rdev->doorbell.num_doorbells) {
1749                 writel(v, rdev->doorbell.ptr + index);
1750         } else {
1751                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1752         }
1753 }
1754
1755 #define BONAIRE_IO_MC_REGS_SIZE 36
1756
1757 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1758 {
1759         {0x00000070, 0x04400000},
1760         {0x00000071, 0x80c01803},
1761         {0x00000072, 0x00004004},
1762         {0x00000073, 0x00000100},
1763         {0x00000074, 0x00ff0000},
1764         {0x00000075, 0x34000000},
1765         {0x00000076, 0x08000014},
1766         {0x00000077, 0x00cc08ec},
1767         {0x00000078, 0x00000400},
1768         {0x00000079, 0x00000000},
1769         {0x0000007a, 0x04090000},
1770         {0x0000007c, 0x00000000},
1771         {0x0000007e, 0x4408a8e8},
1772         {0x0000007f, 0x00000304},
1773         {0x00000080, 0x00000000},
1774         {0x00000082, 0x00000001},
1775         {0x00000083, 0x00000002},
1776         {0x00000084, 0xf3e4f400},
1777         {0x00000085, 0x052024e3},
1778         {0x00000087, 0x00000000},
1779         {0x00000088, 0x01000000},
1780         {0x0000008a, 0x1c0a0000},
1781         {0x0000008b, 0xff010000},
1782         {0x0000008d, 0xffffefff},
1783         {0x0000008e, 0xfff3efff},
1784         {0x0000008f, 0xfff3efbf},
1785         {0x00000092, 0xf7ffffff},
1786         {0x00000093, 0xffffff7f},
1787         {0x00000095, 0x00101101},
1788         {0x00000096, 0x00000fff},
1789         {0x00000097, 0x00116fff},
1790         {0x00000098, 0x60010000},
1791         {0x00000099, 0x10010000},
1792         {0x0000009a, 0x00006000},
1793         {0x0000009b, 0x00001000},
1794         {0x0000009f, 0x00b48000}
1795 };
1796
1797 #define HAWAII_IO_MC_REGS_SIZE 22
1798
1799 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1800 {
1801         {0x0000007d, 0x40000000},
1802         {0x0000007e, 0x40180304},
1803         {0x0000007f, 0x0000ff00},
1804         {0x00000081, 0x00000000},
1805         {0x00000083, 0x00000800},
1806         {0x00000086, 0x00000000},
1807         {0x00000087, 0x00000100},
1808         {0x00000088, 0x00020100},
1809         {0x00000089, 0x00000000},
1810         {0x0000008b, 0x00040000},
1811         {0x0000008c, 0x00000100},
1812         {0x0000008e, 0xff010000},
1813         {0x00000090, 0xffffefff},
1814         {0x00000091, 0xfff3efff},
1815         {0x00000092, 0xfff3efbf},
1816         {0x00000093, 0xf7ffffff},
1817         {0x00000094, 0xffffff7f},
1818         {0x00000095, 0x00000fff},
1819         {0x00000096, 0x00116fff},
1820         {0x00000097, 0x60010000},
1821         {0x00000098, 0x10010000},
1822         {0x0000009f, 0x00c79000}
1823 };
1824
1825
1826 /**
1827  * cik_srbm_select - select specific register instances
1828  *
1829  * @rdev: radeon_device pointer
1830  * @me: selected ME (micro engine)
1831  * @pipe: pipe
1832  * @queue: queue
1833  * @vmid: VMID
1834  *
1835  * Switches the currently active registers instances.  Some
1836  * registers are instanced per VMID, others are instanced per
1837  * me/pipe/queue combination.
1838  */
1839 static void cik_srbm_select(struct radeon_device *rdev,
1840                             u32 me, u32 pipe, u32 queue, u32 vmid)
1841 {
1842         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1843                              MEID(me & 0x3) |
1844                              VMID(vmid & 0xf) |
1845                              QUEUEID(queue & 0x7));
1846         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1847 }
1848
1849 /* ucode loading */
1850 /**
1851  * ci_mc_load_microcode - load MC ucode into the hw
1852  *
1853  * @rdev: radeon_device pointer
1854  *
1855  * Load the GDDR MC ucode into the hw (CIK).
1856  * Returns 0 on success, error on failure.
1857  */
1858 int ci_mc_load_microcode(struct radeon_device *rdev)
1859 {
1860         const __be32 *fw_data = NULL;
1861         const __le32 *new_fw_data = NULL;
1862         u32 running, tmp;
1863         u32 *io_mc_regs = NULL;
1864         const __le32 *new_io_mc_regs = NULL;
1865         int i, regs_size, ucode_size;
1866
1867         if (!rdev->mc_fw)
1868                 return -EINVAL;
1869
1870         if (rdev->new_fw) {
1871                 const struct mc_firmware_header_v1_0 *hdr =
1872                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1873
1874                 radeon_ucode_print_mc_hdr(&hdr->header);
1875
1876                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1877                 new_io_mc_regs = (const __le32 *)
1878                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1879                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1880                 new_fw_data = (const __le32 *)
1881                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1882         } else {
1883                 ucode_size = rdev->mc_fw->datasize / 4;
1884
1885                 switch (rdev->family) {
1886                 case CHIP_BONAIRE:
1887                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1888                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1889                         break;
1890                 case CHIP_HAWAII:
1891                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1892                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1893                         break;
1894                 default:
1895                         return -EINVAL;
1896                 }
1897                 fw_data = (const __be32 *)rdev->mc_fw->data;
1898         }
1899
1900         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1901
1902         if (running == 0) {
1903                 /* reset the engine and set to writable */
1904                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1905                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1906
1907                 /* load mc io regs */
1908                 for (i = 0; i < regs_size; i++) {
1909                         if (rdev->new_fw) {
1910                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1911                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1912                         } else {
1913                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1914                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1915                         }
1916                 }
1917
1918                 tmp = RREG32(MC_SEQ_MISC0);
1919                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1920                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1921                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1922                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1923                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1924                 }
1925
1926                 /* load the MC ucode */
1927                 for (i = 0; i < ucode_size; i++) {
1928                         if (rdev->new_fw)
1929                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1930                         else
1931                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1932                 }
1933
1934                 /* put the engine back into the active state */
1935                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1936                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1937                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1938
1939                 /* wait for training to complete */
1940                 for (i = 0; i < rdev->usec_timeout; i++) {
1941                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1942                                 break;
1943                         udelay(1);
1944                 }
1945                 for (i = 0; i < rdev->usec_timeout; i++) {
1946                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1947                                 break;
1948                         udelay(1);
1949                 }
1950         }
1951
1952         return 0;
1953 }
1954
1955 /**
1956  * cik_init_microcode - load ucode images from disk
1957  *
1958  * @rdev: radeon_device pointer
1959  *
1960  * Use the firmware interface to load the ucode images into
1961  * the driver (not loaded into hw).
1962  * Returns 0 on success, error on failure.
1963  */
1964 static int cik_init_microcode(struct radeon_device *rdev)
1965 {
1966         const char *chip_name;
1967         const char *new_chip_name;
1968         size_t pfp_req_size, me_req_size, ce_req_size,
1969                 mec_req_size, rlc_req_size, mc_req_size = 0,
1970                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1971         char fw_name[30];
1972         int new_fw = 0;
1973         int err;
1974         int num_fw;
1975         bool new_smc = false;
1976
1977         DRM_DEBUG("\n");
1978
1979         switch (rdev->family) {
1980         case CHIP_BONAIRE:
1981                 chip_name = "BONAIRE";
1982                 if ((rdev->pdev->revision == 0x80) ||
1983                     (rdev->pdev->revision == 0x81) ||
1984                     (rdev->pdev->device == 0x665f))
1985                         new_smc = true;
1986                 new_chip_name = "bonaire";
1987                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1988                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1989                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1990                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1991                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1992                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1993                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1994                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1995                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1996                 num_fw = 8;
1997                 break;
1998         case CHIP_HAWAII:
1999                 chip_name = "HAWAII";
2000                 if (rdev->pdev->revision == 0x80)
2001                         new_smc = true;
2002                 new_chip_name = "hawaii";
2003                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2004                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2005                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2006                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2007                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2008                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2009                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2010                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2011                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2012                 num_fw = 8;
2013                 break;
2014         case CHIP_KAVERI:
2015                 chip_name = "KAVERI";
2016                 new_chip_name = "kaveri";
2017                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2018                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2019                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2020                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2021                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2022                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2023                 num_fw = 7;
2024                 break;
2025         case CHIP_KABINI:
2026                 chip_name = "KABINI";
2027                 new_chip_name = "kabini";
2028                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2030                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2033                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034                 num_fw = 6;
2035                 break;
2036         case CHIP_MULLINS:
2037                 chip_name = "MULLINS";
2038                 new_chip_name = "mullins";
2039                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2041                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2044                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045                 num_fw = 6;
2046                 break;
2047         default: BUG();
2048         }
2049
2050         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2051
2052         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
2053         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2054         if (err) {
2055                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
2056                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2057                 if (err)
2058                         goto out;
2059                 if (rdev->pfp_fw->datasize != pfp_req_size) {
2060                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2061                                rdev->pfp_fw->datasize, fw_name);
2062                         err = -EINVAL;
2063                         goto out;
2064                 }
2065         } else {
2066                 err = radeon_ucode_validate(rdev->pfp_fw);
2067                 if (err) {
2068                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2069                                fw_name);
2070                         goto out;
2071                 } else {
2072                         new_fw++;
2073                 }
2074         }
2075
2076         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2077         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2078         if (err) {
2079                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2080                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2081                 if (err)
2082                         goto out;
2083                 if (rdev->me_fw->datasize != me_req_size) {
2084                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2085                                rdev->me_fw->datasize, fw_name);
2086                         err = -EINVAL;
2087                 }
2088         } else {
2089                 err = radeon_ucode_validate(rdev->me_fw);
2090                 if (err) {
2091                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2092                                fw_name);
2093                         goto out;
2094                 } else {
2095                         new_fw++;
2096                 }
2097         }
2098
2099         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2100         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2101         if (err) {
2102                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2103                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2104                 if (err)
2105                         goto out;
2106                 if (rdev->ce_fw->datasize != ce_req_size) {
2107                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2108                                rdev->ce_fw->datasize, fw_name);
2109                         err = -EINVAL;
2110                 }
2111         } else {
2112                 err = radeon_ucode_validate(rdev->ce_fw);
2113                 if (err) {
2114                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2115                                fw_name);
2116                         goto out;
2117                 } else {
2118                         new_fw++;
2119                 }
2120         }
2121
2122         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2123         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2124         if (err) {
2125                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2126                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2127                 if (err)
2128                         goto out;
2129                 if (rdev->mec_fw->datasize != mec_req_size) {
2130                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2131                                rdev->mec_fw->datasize, fw_name);
2132                         err = -EINVAL;
2133                 }
2134         } else {
2135                 err = radeon_ucode_validate(rdev->mec_fw);
2136                 if (err) {
2137                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2138                                fw_name);
2139                         goto out;
2140                 } else {
2141                         new_fw++;
2142                 }
2143         }
2144
2145         if (rdev->family == CHIP_KAVERI) {
2146                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2147                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2148                 if (err) {
2149                         goto out;
2150                 } else {
2151                         err = radeon_ucode_validate(rdev->mec2_fw);
2152                         if (err) {
2153                                 goto out;
2154                         } else {
2155                                 new_fw++;
2156                         }
2157                 }
2158         }
2159
2160         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2161         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2162         if (err) {
2163                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2164                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2165                 if (err)
2166                         goto out;
2167                 if (rdev->rlc_fw->datasize != rlc_req_size) {
2168                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2169                                rdev->rlc_fw->datasize, fw_name);
2170                         err = -EINVAL;
2171                 }
2172         } else {
2173                 err = radeon_ucode_validate(rdev->rlc_fw);
2174                 if (err) {
2175                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2176                                fw_name);
2177                         goto out;
2178                 } else {
2179                         new_fw++;
2180                 }
2181         }
2182
2183         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2184         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2185         if (err) {
2186                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2187                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2188                 if (err)
2189                         goto out;
2190                 if (rdev->sdma_fw->datasize != sdma_req_size) {
2191                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2192                                rdev->sdma_fw->datasize, fw_name);
2193                         err = -EINVAL;
2194                 }
2195         } else {
2196                 err = radeon_ucode_validate(rdev->sdma_fw);
2197                 if (err) {
2198                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2199                                fw_name);
2200                         goto out;
2201                 } else {
2202                         new_fw++;
2203                 }
2204         }
2205
2206         /* No SMC, MC ucode on APUs */
2207         if (!(rdev->flags & RADEON_IS_IGP)) {
2208                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2209                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2210                 if (err) {
2211                         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2212                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2213                         if (err) {
2214                                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2215                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2216                                 if (err)
2217                                         goto out;
2218                         }
2219                         if ((rdev->mc_fw->datasize != mc_req_size) &&
2220                             (rdev->mc_fw->datasize != mc2_req_size)){
2221                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2222                                        rdev->mc_fw->datasize, fw_name);
2223                                 err = -EINVAL;
2224                         }
2225                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2226                 } else {
2227                         err = radeon_ucode_validate(rdev->mc_fw);
2228                         if (err) {
2229                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2230                                        fw_name);
2231                                 goto out;
2232                         } else {
2233                                 new_fw++;
2234                         }
2235                 }
2236
2237                 if (new_smc)
2238                         ksnprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_k_smc", new_chip_name);
2239                 else
2240                         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2241                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2242                 if (err) {
2243                         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2244                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2245                         if (err) {
2246                                 pr_err("smc: error loading firmware \"%s\"\n",
2247                                        fw_name);
2248                                 release_firmware(rdev->smc_fw);
2249                                 rdev->smc_fw = NULL;
2250                                 err = 0;
2251                         } else if (rdev->smc_fw->datasize != smc_req_size) {
2252                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2253                                        rdev->smc_fw->datasize, fw_name);
2254                                 err = -EINVAL;
2255                         }
2256                 } else {
2257                         err = radeon_ucode_validate(rdev->smc_fw);
2258                         if (err) {
2259                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2260                                        fw_name);
2261                                 goto out;
2262                         } else {
2263                                 new_fw++;
2264                         }
2265                 }
2266         }
2267
2268         if (new_fw == 0) {
2269                 rdev->new_fw = false;
2270         } else if (new_fw < num_fw) {
2271                 pr_err("ci_fw: mixing new and old firmware!\n");
2272                 err = -EINVAL;
2273         } else {
2274                 rdev->new_fw = true;
2275         }
2276
2277 out:
2278         if (err) {
2279                 if (err != -EINVAL)
2280                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2281                                fw_name);
2282                 release_firmware(rdev->pfp_fw);
2283                 rdev->pfp_fw = NULL;
2284                 release_firmware(rdev->me_fw);
2285                 rdev->me_fw = NULL;
2286                 release_firmware(rdev->ce_fw);
2287                 rdev->ce_fw = NULL;
2288                 release_firmware(rdev->mec_fw);
2289                 rdev->mec_fw = NULL;
2290                 release_firmware(rdev->mec2_fw);
2291                 rdev->mec2_fw = NULL;
2292                 release_firmware(rdev->rlc_fw);
2293                 rdev->rlc_fw = NULL;
2294                 release_firmware(rdev->sdma_fw);
2295                 rdev->sdma_fw = NULL;
2296                 release_firmware(rdev->mc_fw);
2297                 rdev->mc_fw = NULL;
2298                 release_firmware(rdev->smc_fw);
2299                 rdev->smc_fw = NULL;
2300         }
2301         return err;
2302 }
2303
2304 /**
2305  * cik_fini_microcode - drop the firmwares image references
2306  *
2307  * @rdev: radeon_device pointer
2308  *
2309  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2310  * Called at driver shutdown.
2311  */
2312 static void cik_fini_microcode(struct radeon_device *rdev)
2313 {
2314         release_firmware(rdev->pfp_fw);
2315         rdev->pfp_fw = NULL;
2316         release_firmware(rdev->me_fw);
2317         rdev->me_fw = NULL;
2318         release_firmware(rdev->ce_fw);
2319         rdev->ce_fw = NULL;
2320         release_firmware(rdev->mec_fw);
2321         rdev->mec_fw = NULL;
2322         release_firmware(rdev->mec2_fw);
2323         rdev->mec2_fw = NULL;
2324         release_firmware(rdev->rlc_fw);
2325         rdev->rlc_fw = NULL;
2326         release_firmware(rdev->sdma_fw);
2327         rdev->sdma_fw = NULL;
2328         release_firmware(rdev->mc_fw);
2329         rdev->mc_fw = NULL;
2330         release_firmware(rdev->smc_fw);
2331         rdev->smc_fw = NULL;
2332 }
2333
2334 /*
2335  * Core functions
2336  */
2337 /**
2338  * cik_tiling_mode_table_init - init the hw tiling table
2339  *
2340  * @rdev: radeon_device pointer
2341  *
2342  * Starting with SI, the tiling setup is done globally in a
2343  * set of 32 tiling modes.  Rather than selecting each set of
2344  * parameters per surface as on older asics, we just select
2345  * which index in the tiling table we want to use, and the
2346  * surface uses those parameters (CIK).
2347  */
2348 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2349 {
2350         u32 *tile = rdev->config.cik.tile_mode_array;
2351         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2352         const u32 num_tile_mode_states =
2353                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2354         const u32 num_secondary_tile_mode_states =
2355                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2356         u32 reg_offset, split_equal_to_row_size;
2357         u32 num_pipe_configs;
2358         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2359                 rdev->config.cik.max_shader_engines;
2360
2361         switch (rdev->config.cik.mem_row_size_in_kb) {
2362         case 1:
2363                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2364                 break;
2365         case 2:
2366         default:
2367                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2368                 break;
2369         case 4:
2370                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2371                 break;
2372         }
2373
2374         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2375         if (num_pipe_configs > 8)
2376                 num_pipe_configs = 16;
2377
2378         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2379                 tile[reg_offset] = 0;
2380         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2381                 macrotile[reg_offset] = 0;
2382
2383         switch(num_pipe_configs) {
2384         case 16:
2385                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2389                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2393                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2401                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2403                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                            TILE_SPLIT(split_equal_to_row_size));
2405                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2412                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2413                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2414                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                            TILE_SPLIT(split_equal_to_row_size));
2416                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2417                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2418                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2421                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2431                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2436                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2451                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2458                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2460                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463
2464                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467                            NUM_BANKS(ADDR_SURF_16_BANK));
2468                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471                            NUM_BANKS(ADDR_SURF_16_BANK));
2472                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475                            NUM_BANKS(ADDR_SURF_16_BANK));
2476                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479                            NUM_BANKS(ADDR_SURF_16_BANK));
2480                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                            NUM_BANKS(ADDR_SURF_8_BANK));
2484                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                            NUM_BANKS(ADDR_SURF_4_BANK));
2488                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491                            NUM_BANKS(ADDR_SURF_2_BANK));
2492                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2494                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                            NUM_BANKS(ADDR_SURF_16_BANK));
2496                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2498                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499                            NUM_BANKS(ADDR_SURF_16_BANK));
2500                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503                             NUM_BANKS(ADDR_SURF_16_BANK));
2504                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507                             NUM_BANKS(ADDR_SURF_8_BANK));
2508                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511                             NUM_BANKS(ADDR_SURF_4_BANK));
2512                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515                             NUM_BANKS(ADDR_SURF_2_BANK));
2516                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519                             NUM_BANKS(ADDR_SURF_2_BANK));
2520
2521                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2522                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2523                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2524                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2525                 break;
2526
2527         case 8:
2528                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2532                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2536                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2544                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                            TILE_SPLIT(split_equal_to_row_size));
2548                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2556                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                            TILE_SPLIT(split_equal_to_row_size));
2559                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2560                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2561                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2564                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2579                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2594                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2599                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2601                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2603                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606
2607                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610                                 NUM_BANKS(ADDR_SURF_16_BANK));
2611                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2613                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614                                 NUM_BANKS(ADDR_SURF_16_BANK));
2615                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618                                 NUM_BANKS(ADDR_SURF_16_BANK));
2619                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626                                 NUM_BANKS(ADDR_SURF_8_BANK));
2627                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630                                 NUM_BANKS(ADDR_SURF_4_BANK));
2631                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634                                 NUM_BANKS(ADDR_SURF_2_BANK));
2635                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654                                 NUM_BANKS(ADDR_SURF_8_BANK));
2655                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658                                 NUM_BANKS(ADDR_SURF_4_BANK));
2659                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662                                 NUM_BANKS(ADDR_SURF_2_BANK));
2663
2664                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2665                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2666                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2667                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2668                 break;
2669
2670         case 4:
2671                 if (num_rbs == 4) {
2672                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2676                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2680                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2688                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                            TILE_SPLIT(split_equal_to_row_size));
2692                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2699                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2700                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2701                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                            TILE_SPLIT(split_equal_to_row_size));
2703                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2704                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2705                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2708                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2723                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2738                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2739                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2745                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2747                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750
2751                 } else if (num_rbs < 4) {
2752                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2756                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2760                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2768                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771                            TILE_SPLIT(split_equal_to_row_size));
2772                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2775                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2780                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2781                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782                            TILE_SPLIT(split_equal_to_row_size));
2783                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2784                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2785                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2788                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2803                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2818                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2827                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2828                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830                 }
2831
2832                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843                                 NUM_BANKS(ADDR_SURF_16_BANK));
2844                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847                                 NUM_BANKS(ADDR_SURF_16_BANK));
2848                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_8_BANK));
2856                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859                                 NUM_BANKS(ADDR_SURF_4_BANK));
2860                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863                                 NUM_BANKS(ADDR_SURF_16_BANK));
2864                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867                                 NUM_BANKS(ADDR_SURF_16_BANK));
2868                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871                                 NUM_BANKS(ADDR_SURF_16_BANK));
2872                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875                                 NUM_BANKS(ADDR_SURF_16_BANK));
2876                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_16_BANK));
2880                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883                                 NUM_BANKS(ADDR_SURF_8_BANK));
2884                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2887                                 NUM_BANKS(ADDR_SURF_4_BANK));
2888
2889                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2890                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2891                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2892                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2893                 break;
2894
2895         case 2:
2896                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898                            PIPE_CONFIG(ADDR_SURF_P2) |
2899                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2900                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902                            PIPE_CONFIG(ADDR_SURF_P2) |
2903                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2904                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910                            PIPE_CONFIG(ADDR_SURF_P2) |
2911                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2912                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914                            PIPE_CONFIG(ADDR_SURF_P2) |
2915                            TILE_SPLIT(split_equal_to_row_size));
2916                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917                            PIPE_CONFIG(ADDR_SURF_P2) |
2918                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921                            PIPE_CONFIG(ADDR_SURF_P2) |
2922                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2923                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2925                            PIPE_CONFIG(ADDR_SURF_P2) |
2926                            TILE_SPLIT(split_equal_to_row_size));
2927                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2928                            PIPE_CONFIG(ADDR_SURF_P2);
2929                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931                            PIPE_CONFIG(ADDR_SURF_P2));
2932                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934                             PIPE_CONFIG(ADDR_SURF_P2) |
2935                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938                             PIPE_CONFIG(ADDR_SURF_P2) |
2939                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942                             PIPE_CONFIG(ADDR_SURF_P2) |
2943                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945                             PIPE_CONFIG(ADDR_SURF_P2) |
2946                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2947                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949                             PIPE_CONFIG(ADDR_SURF_P2) |
2950                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                             PIPE_CONFIG(ADDR_SURF_P2) |
2954                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                             PIPE_CONFIG(ADDR_SURF_P2) |
2958                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2960                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2961                             PIPE_CONFIG(ADDR_SURF_P2));
2962                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2963                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964                             PIPE_CONFIG(ADDR_SURF_P2) |
2965                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968                             PIPE_CONFIG(ADDR_SURF_P2) |
2969                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972                             PIPE_CONFIG(ADDR_SURF_P2) |
2973                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974
2975                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2977                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                 NUM_BANKS(ADDR_SURF_16_BANK));
2979                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2980                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                 NUM_BANKS(ADDR_SURF_16_BANK));
2983                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2985                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986                                 NUM_BANKS(ADDR_SURF_16_BANK));
2987                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                 NUM_BANKS(ADDR_SURF_16_BANK));
2991                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                 NUM_BANKS(ADDR_SURF_16_BANK));
2999                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002                                 NUM_BANKS(ADDR_SURF_8_BANK));
3003                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3005                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006                                 NUM_BANKS(ADDR_SURF_16_BANK));
3007                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3008                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010                                 NUM_BANKS(ADDR_SURF_16_BANK));
3011                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014                                 NUM_BANKS(ADDR_SURF_16_BANK));
3015                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                 NUM_BANKS(ADDR_SURF_16_BANK));
3027                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030                                 NUM_BANKS(ADDR_SURF_8_BANK));
3031
3032                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3033                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3034                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3035                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3036                 break;
3037
3038         default:
3039                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3040         }
3041 }
3042
3043 /**
3044  * cik_select_se_sh - select which SE, SH to address
3045  *
3046  * @rdev: radeon_device pointer
3047  * @se_num: shader engine to address
3048  * @sh_num: sh block to address
3049  *
3050  * Select which SE, SH combinations to address. Certain
3051  * registers are instanced per SE or SH.  0xffffffff means
3052  * broadcast to all SEs or SHs (CIK).
3053  */
3054 static void cik_select_se_sh(struct radeon_device *rdev,
3055                              u32 se_num, u32 sh_num)
3056 {
3057         u32 data = INSTANCE_BROADCAST_WRITES;
3058
3059         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3060                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3061         else if (se_num == 0xffffffff)
3062                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3063         else if (sh_num == 0xffffffff)
3064                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3065         else
3066                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3067         WREG32(GRBM_GFX_INDEX, data);
3068 }
3069
3070 /**
3071  * cik_create_bitmask - create a bitmask
3072  *
3073  * @bit_width: length of the mask
3074  *
3075  * create a variable length bit mask (CIK).
3076  * Returns the bitmask.
3077  */
3078 static u32 cik_create_bitmask(u32 bit_width)
3079 {
3080         u32 i, mask = 0;
3081
3082         for (i = 0; i < bit_width; i++) {
3083                 mask <<= 1;
3084                 mask |= 1;
3085         }
3086         return mask;
3087 }
3088
3089 /**
3090  * cik_get_rb_disabled - computes the mask of disabled RBs
3091  *
3092  * @rdev: radeon_device pointer
3093  * @max_rb_num: max RBs (render backends) for the asic
3094  * @se_num: number of SEs (shader engines) for the asic
3095  * @sh_per_se: number of SH blocks per SE for the asic
3096  *
3097  * Calculates the bitmask of disabled RBs (CIK).
3098  * Returns the disabled RB bitmask.
3099  */
3100 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3101                               u32 max_rb_num_per_se,
3102                               u32 sh_per_se)
3103 {
3104         u32 data, mask;
3105
3106         data = RREG32(CC_RB_BACKEND_DISABLE);
3107         if (data & 1)
3108                 data &= BACKEND_DISABLE_MASK;
3109         else
3110                 data = 0;
3111         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3112
3113         data >>= BACKEND_DISABLE_SHIFT;
3114
3115         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3116
3117         return data & mask;
3118 }
3119
3120 /**
3121  * cik_setup_rb - setup the RBs on the asic
3122  *
3123  * @rdev: radeon_device pointer
3124  * @se_num: number of SEs (shader engines) for the asic
3125  * @sh_per_se: number of SH blocks per SE for the asic
3126  * @max_rb_num: max RBs (render backends) for the asic
3127  *
3128  * Configures per-SE/SH RB registers (CIK).
3129  */
3130 static void cik_setup_rb(struct radeon_device *rdev,
3131                          u32 se_num, u32 sh_per_se,
3132                          u32 max_rb_num_per_se)
3133 {
3134         int i, j;
3135         u32 data, mask;
3136         u32 disabled_rbs = 0;
3137         u32 enabled_rbs = 0;
3138
3139         mutex_lock(&rdev->grbm_idx_mutex);
3140         for (i = 0; i < se_num; i++) {
3141                 for (j = 0; j < sh_per_se; j++) {
3142                         cik_select_se_sh(rdev, i, j);
3143                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3144                         if (rdev->family == CHIP_HAWAII)
3145                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3146                         else
3147                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3148                 }
3149         }
3150         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3151         mutex_unlock(&rdev->grbm_idx_mutex);
3152
3153         mask = 1;
3154         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3155                 if (!(disabled_rbs & mask))
3156                         enabled_rbs |= mask;
3157                 mask <<= 1;
3158         }
3159
3160         rdev->config.cik.backend_enable_mask = enabled_rbs;
3161
3162         mutex_lock(&rdev->grbm_idx_mutex);
3163         for (i = 0; i < se_num; i++) {
3164                 cik_select_se_sh(rdev, i, 0xffffffff);
3165                 data = 0;
3166                 for (j = 0; j < sh_per_se; j++) {
3167                         switch (enabled_rbs & 3) {
3168                         case 0:
3169                                 if (j == 0)
3170                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3171                                 else
3172                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3173                                 break;
3174                         case 1:
3175                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3176                                 break;
3177                         case 2:
3178                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3179                                 break;
3180                         case 3:
3181                         default:
3182                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3183                                 break;
3184                         }
3185                         enabled_rbs >>= 2;
3186                 }
3187                 WREG32(PA_SC_RASTER_CONFIG, data);
3188         }
3189         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3190         mutex_unlock(&rdev->grbm_idx_mutex);
3191 }
3192
3193 /**
3194  * cik_gpu_init - setup the 3D engine
3195  *
3196  * @rdev: radeon_device pointer
3197  *
3198  * Configures the 3D engine and tiling configuration
3199  * registers so that the 3D engine is usable.
3200  */
3201 static void cik_gpu_init(struct radeon_device *rdev)
3202 {
3203         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3204         u32 mc_shared_chmap, mc_arb_ramcfg;
3205         u32 hdp_host_path_cntl;
3206         u32 tmp;
3207         int i, j;
3208
3209         switch (rdev->family) {
3210         case CHIP_BONAIRE:
3211                 rdev->config.cik.max_shader_engines = 2;
3212                 rdev->config.cik.max_tile_pipes = 4;
3213                 rdev->config.cik.max_cu_per_sh = 7;
3214                 rdev->config.cik.max_sh_per_se = 1;
3215                 rdev->config.cik.max_backends_per_se = 2;
3216                 rdev->config.cik.max_texture_channel_caches = 4;
3217                 rdev->config.cik.max_gprs = 256;
3218                 rdev->config.cik.max_gs_threads = 32;
3219                 rdev->config.cik.max_hw_contexts = 8;
3220
3221                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3222                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3223                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3224                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3225                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3226                 break;
3227         case CHIP_HAWAII:
3228                 rdev->config.cik.max_shader_engines = 4;
3229                 rdev->config.cik.max_tile_pipes = 16;
3230                 rdev->config.cik.max_cu_per_sh = 11;
3231                 rdev->config.cik.max_sh_per_se = 1;
3232                 rdev->config.cik.max_backends_per_se = 4;
3233                 rdev->config.cik.max_texture_channel_caches = 16;
3234                 rdev->config.cik.max_gprs = 256;
3235                 rdev->config.cik.max_gs_threads = 32;
3236                 rdev->config.cik.max_hw_contexts = 8;
3237
3238                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3239                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3240                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3241                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3242                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3243                 break;
3244         case CHIP_KAVERI:
3245                 rdev->config.cik.max_shader_engines = 1;
3246                 rdev->config.cik.max_tile_pipes = 4;
3247                 rdev->config.cik.max_cu_per_sh = 8;
3248                 rdev->config.cik.max_backends_per_se = 2;
3249                 rdev->config.cik.max_sh_per_se = 1;
3250                 rdev->config.cik.max_texture_channel_caches = 4;
3251                 rdev->config.cik.max_gprs = 256;
3252                 rdev->config.cik.max_gs_threads = 16;
3253                 rdev->config.cik.max_hw_contexts = 8;
3254
3255                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260                 break;
3261         case CHIP_KABINI:
3262         case CHIP_MULLINS:
3263         default:
3264                 rdev->config.cik.max_shader_engines = 1;
3265                 rdev->config.cik.max_tile_pipes = 2;
3266                 rdev->config.cik.max_cu_per_sh = 2;
3267                 rdev->config.cik.max_sh_per_se = 1;
3268                 rdev->config.cik.max_backends_per_se = 1;
3269                 rdev->config.cik.max_texture_channel_caches = 2;
3270                 rdev->config.cik.max_gprs = 256;
3271                 rdev->config.cik.max_gs_threads = 16;
3272                 rdev->config.cik.max_hw_contexts = 8;
3273
3274                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3275                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3276                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3277                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3278                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3279                 break;
3280         }
3281
3282         /* Initialize HDP */
3283         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3284                 WREG32((0x2c14 + j), 0x00000000);
3285                 WREG32((0x2c18 + j), 0x00000000);
3286                 WREG32((0x2c1c + j), 0x00000000);
3287                 WREG32((0x2c20 + j), 0x00000000);
3288                 WREG32((0x2c24 + j), 0x00000000);
3289         }
3290
3291         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3292         WREG32(SRBM_INT_CNTL, 0x1);
3293         WREG32(SRBM_INT_ACK, 0x1);
3294
3295         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3296
3297         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3298         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3299
3300         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3301         rdev->config.cik.mem_max_burst_length_bytes = 256;
3302         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3303         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3304         if (rdev->config.cik.mem_row_size_in_kb > 4)
3305                 rdev->config.cik.mem_row_size_in_kb = 4;
3306         /* XXX use MC settings? */
3307         rdev->config.cik.shader_engine_tile_size = 32;
3308         rdev->config.cik.num_gpus = 1;
3309         rdev->config.cik.multi_gpu_tile_size = 64;
3310
3311         /* fix up row size */
3312         gb_addr_config &= ~ROW_SIZE_MASK;
3313         switch (rdev->config.cik.mem_row_size_in_kb) {
3314         case 1:
3315         default:
3316                 gb_addr_config |= ROW_SIZE(0);
3317                 break;
3318         case 2:
3319                 gb_addr_config |= ROW_SIZE(1);
3320                 break;
3321         case 4:
3322                 gb_addr_config |= ROW_SIZE(2);
3323                 break;
3324         }
3325
3326         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3327          * not have bank info, so create a custom tiling dword.
3328          * bits 3:0   num_pipes
3329          * bits 7:4   num_banks
3330          * bits 11:8  group_size
3331          * bits 15:12 row_size
3332          */
3333         rdev->config.cik.tile_config = 0;
3334         switch (rdev->config.cik.num_tile_pipes) {
3335         case 1:
3336                 rdev->config.cik.tile_config |= (0 << 0);
3337                 break;
3338         case 2:
3339                 rdev->config.cik.tile_config |= (1 << 0);
3340                 break;
3341         case 4:
3342                 rdev->config.cik.tile_config |= (2 << 0);
3343                 break;
3344         case 8:
3345         default:
3346                 /* XXX what about 12? */
3347                 rdev->config.cik.tile_config |= (3 << 0);
3348                 break;
3349         }
3350         rdev->config.cik.tile_config |=
3351                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3352         rdev->config.cik.tile_config |=
3353                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3354         rdev->config.cik.tile_config |=
3355                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3356
3357         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3358         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3359         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3360         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3361         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3362         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3363         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3364         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3365
3366         cik_tiling_mode_table_init(rdev);
3367
3368         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3369                      rdev->config.cik.max_sh_per_se,
3370                      rdev->config.cik.max_backends_per_se);
3371
3372         rdev->config.cik.active_cus = 0;
3373         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3374                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3375                         rdev->config.cik.active_cus +=
3376                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3377                 }
3378         }
3379
3380         /* set HW defaults for 3D engine */
3381         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3382
3383         mutex_lock(&rdev->grbm_idx_mutex);
3384         /*
3385          * making sure that the following register writes will be broadcasted
3386          * to all the shaders
3387          */
3388         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3389         WREG32(SX_DEBUG_1, 0x20);
3390
3391         WREG32(TA_CNTL_AUX, 0x00010000);
3392
3393         tmp = RREG32(SPI_CONFIG_CNTL);
3394         tmp |= 0x03000000;
3395         WREG32(SPI_CONFIG_CNTL, tmp);
3396
3397         WREG32(SQ_CONFIG, 1);
3398
3399         WREG32(DB_DEBUG, 0);
3400
3401         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3402         tmp |= 0x00000400;
3403         WREG32(DB_DEBUG2, tmp);
3404
3405         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3406         tmp |= 0x00020200;
3407         WREG32(DB_DEBUG3, tmp);
3408
3409         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3410         tmp |= 0x00018208;
3411         WREG32(CB_HW_CONTROL, tmp);
3412
3413         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3414
3415         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3416                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3417                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3418                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3419
3420         WREG32(VGT_NUM_INSTANCES, 1);
3421
3422         WREG32(CP_PERFMON_CNTL, 0);
3423
3424         WREG32(SQ_CONFIG, 0);
3425
3426         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3427                                           FORCE_EOV_MAX_REZ_CNT(255)));
3428
3429         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3430                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3431
3432         WREG32(VGT_GS_VERTEX_REUSE, 16);
3433         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3434
3435         tmp = RREG32(HDP_MISC_CNTL);
3436         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3437         WREG32(HDP_MISC_CNTL, tmp);
3438
3439         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3440         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3441
3442         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3443         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3444         mutex_unlock(&rdev->grbm_idx_mutex);
3445
3446         udelay(50);
3447 }
3448
3449 /*
3450  * GPU scratch registers helpers function.
3451  */
3452 /**
3453  * cik_scratch_init - setup driver info for CP scratch regs
3454  *
3455  * @rdev: radeon_device pointer
3456  *
3457  * Set up the number and offset of the CP scratch registers.
3458  * NOTE: use of CP scratch registers is a legacy inferface and
3459  * is not used by default on newer asics (r6xx+).  On newer asics,
3460  * memory buffers are used for fences rather than scratch regs.
3461  */
3462 static void cik_scratch_init(struct radeon_device *rdev)
3463 {
3464         int i;
3465
3466         rdev->scratch.num_reg = 7;
3467         rdev->scratch.reg_base = SCRATCH_REG0;
3468         for (i = 0; i < rdev->scratch.num_reg; i++) {
3469                 rdev->scratch.free[i] = true;
3470                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3471         }
3472 }
3473
3474 /**
3475  * cik_ring_test - basic gfx ring test
3476  *
3477  * @rdev: radeon_device pointer
3478  * @ring: radeon_ring structure holding ring information
3479  *
3480  * Allocate a scratch register and write to it using the gfx ring (CIK).
3481  * Provides a basic gfx ring test to verify that the ring is working.
3482  * Used by cik_cp_gfx_resume();
3483  * Returns 0 on success, error on failure.
3484  */
3485 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3486 {
3487         uint32_t scratch;
3488         uint32_t tmp = 0;
3489         unsigned i;
3490         int r;
3491
3492         r = radeon_scratch_get(rdev, &scratch);
3493         if (r) {
3494                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3495                 return r;
3496         }
3497         WREG32(scratch, 0xCAFEDEAD);
3498         r = radeon_ring_lock(rdev, ring, 3);
3499         if (r) {
3500                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3501                 radeon_scratch_free(rdev, scratch);
3502                 return r;
3503         }
3504         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3505         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3506         radeon_ring_write(ring, 0xDEADBEEF);
3507         radeon_ring_unlock_commit(rdev, ring, false);
3508
3509         for (i = 0; i < rdev->usec_timeout; i++) {
3510                 tmp = RREG32(scratch);
3511                 if (tmp == 0xDEADBEEF)
3512                         break;
3513                 DRM_UDELAY(1);
3514         }
3515         if (i < rdev->usec_timeout) {
3516                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3517         } else {
3518                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3519                           ring->idx, scratch, tmp);
3520                 r = -EINVAL;
3521         }
3522         radeon_scratch_free(rdev, scratch);
3523         return r;
3524 }
3525
3526 /**
3527  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3528  *
3529  * @rdev: radeon_device pointer
3530  * @ridx: radeon ring index
3531  *
3532  * Emits an hdp flush on the cp.
3533  */
3534 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3535                                        int ridx)
3536 {
3537         struct radeon_ring *ring = &rdev->ring[ridx];
3538         u32 ref_and_mask;
3539
3540         switch (ring->idx) {
3541         case CAYMAN_RING_TYPE_CP1_INDEX:
3542         case CAYMAN_RING_TYPE_CP2_INDEX:
3543         default:
3544                 switch (ring->me) {
3545                 case 0:
3546                         ref_and_mask = CP2 << ring->pipe;
3547                         break;
3548                 case 1:
3549                         ref_and_mask = CP6 << ring->pipe;
3550                         break;
3551                 default:
3552                         return;
3553                 }
3554                 break;
3555         case RADEON_RING_TYPE_GFX_INDEX:
3556                 ref_and_mask = CP0;
3557                 break;
3558         }
3559
3560         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3561         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3562                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3563                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3564         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3565         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3566         radeon_ring_write(ring, ref_and_mask);
3567         radeon_ring_write(ring, ref_and_mask);
3568         radeon_ring_write(ring, 0x20); /* poll interval */
3569 }
3570
3571 /**
3572  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3573  *
3574  * @rdev: radeon_device pointer
3575  * @fence: radeon fence object
3576  *
3577  * Emits a fence sequnce number on the gfx ring and flushes
3578  * GPU caches.
3579  */
3580 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3581                              struct radeon_fence *fence)
3582 {
3583         struct radeon_ring *ring = &rdev->ring[fence->ring];
3584         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3585
3586         /* Workaround for cache flush problems. First send a dummy EOP
3587          * event down the pipe with seq one below.
3588          */
3589         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3590         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3591                                  EOP_TC_ACTION_EN |
3592                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3593                                  EVENT_INDEX(5)));
3594         radeon_ring_write(ring, addr & 0xfffffffc);
3595         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3596                                 DATA_SEL(1) | INT_SEL(0));
3597         radeon_ring_write(ring, fence->seq - 1);
3598         radeon_ring_write(ring, 0);
3599
3600         /* Then send the real EOP event down the pipe. */
3601         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3602         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3603                                  EOP_TC_ACTION_EN |
3604                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3605                                  EVENT_INDEX(5)));
3606         radeon_ring_write(ring, addr & 0xfffffffc);
3607         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3608         radeon_ring_write(ring, fence->seq);
3609         radeon_ring_write(ring, 0);
3610 }
3611
3612 /**
3613  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3614  *
3615  * @rdev: radeon_device pointer
3616  * @fence: radeon fence object
3617  *
3618  * Emits a fence sequnce number on the compute ring and flushes
3619  * GPU caches.
3620  */
3621 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3622                                  struct radeon_fence *fence)
3623 {
3624         struct radeon_ring *ring = &rdev->ring[fence->ring];
3625         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3626
3627         /* RELEASE_MEM - flush caches, send int */
3628         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3629         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3630                                  EOP_TC_ACTION_EN |
3631                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3632                                  EVENT_INDEX(5)));
3633         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3634         radeon_ring_write(ring, addr & 0xfffffffc);
3635         radeon_ring_write(ring, upper_32_bits(addr));
3636         radeon_ring_write(ring, fence->seq);
3637         radeon_ring_write(ring, 0);
3638 }
3639
3640 /**
3641  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3642  *
3643  * @rdev: radeon_device pointer
3644  * @ring: radeon ring buffer object
3645  * @semaphore: radeon semaphore object
3646  * @emit_wait: Is this a sempahore wait?
3647  *
3648  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3649  * from running ahead of semaphore waits.
3650  */
3651 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3652                              struct radeon_ring *ring,
3653                              struct radeon_semaphore *semaphore,
3654                              bool emit_wait)
3655 {
3656         uint64_t addr = semaphore->gpu_addr;
3657         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3658
3659         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3660         radeon_ring_write(ring, lower_32_bits(addr));
3661         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3662
3663         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3664                 /* Prevent the PFP from running ahead of the semaphore wait */
3665                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3666                 radeon_ring_write(ring, 0x0);
3667         }
3668
3669         return true;
3670 }
3671
3672 /**
3673  * cik_copy_cpdma - copy pages using the CP DMA engine
3674  *
3675  * @rdev: radeon_device pointer
3676  * @src_offset: src GPU address
3677  * @dst_offset: dst GPU address
3678  * @num_gpu_pages: number of GPU pages to xfer
3679  * @resv: reservation object to sync to
3680  *
3681  * Copy GPU paging using the CP DMA engine (CIK+).
3682  * Used by the radeon ttm implementation to move pages if
3683  * registered as the asic copy callback.
3684  */
3685 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3686                                     uint64_t src_offset, uint64_t dst_offset,
3687                                     unsigned num_gpu_pages,
3688                                     struct reservation_object *resv)
3689 {
3690         struct radeon_fence *fence;
3691         struct radeon_sync sync;
3692         int ring_index = rdev->asic->copy.blit_ring_index;
3693         struct radeon_ring *ring = &rdev->ring[ring_index];
3694         u32 size_in_bytes, cur_size_in_bytes, control;
3695         int i, num_loops;
3696         int r = 0;
3697
3698         radeon_sync_create(&sync);
3699
3700         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3701         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3702         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3703         if (r) {
3704                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3705                 radeon_sync_free(rdev, &sync, NULL);
3706                 return ERR_PTR(r);
3707         }
3708
3709         radeon_sync_resv(rdev, &sync, resv, false);
3710         radeon_sync_rings(rdev, &sync, ring->idx);
3711
3712         for (i = 0; i < num_loops; i++) {
3713                 cur_size_in_bytes = size_in_bytes;
3714                 if (cur_size_in_bytes > 0x1fffff)
3715                         cur_size_in_bytes = 0x1fffff;
3716                 size_in_bytes -= cur_size_in_bytes;
3717                 control = 0;
3718                 if (size_in_bytes == 0)
3719                         control |= PACKET3_DMA_DATA_CP_SYNC;
3720                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3721                 radeon_ring_write(ring, control);
3722                 radeon_ring_write(ring, lower_32_bits(src_offset));
3723                 radeon_ring_write(ring, upper_32_bits(src_offset));
3724                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3725                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3726                 radeon_ring_write(ring, cur_size_in_bytes);
3727                 src_offset += cur_size_in_bytes;
3728                 dst_offset += cur_size_in_bytes;
3729         }
3730
3731         r = radeon_fence_emit(rdev, &fence, ring->idx);
3732         if (r) {
3733                 radeon_ring_unlock_undo(rdev, ring);
3734                 radeon_sync_free(rdev, &sync, NULL);
3735                 return ERR_PTR(r);
3736         }
3737
3738         radeon_ring_unlock_commit(rdev, ring, false);
3739         radeon_sync_free(rdev, &sync, fence);
3740
3741         return fence;
3742 }
3743
3744 /*
3745  * IB stuff
3746  */
3747 /**
3748  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3749  *
3750  * @rdev: radeon_device pointer
3751  * @ib: radeon indirect buffer object
3752  *
3753  * Emits a DE (drawing engine) or CE (constant engine) IB
3754  * on the gfx ring.  IBs are usually generated by userspace
3755  * acceleration drivers and submitted to the kernel for
3756  * scheduling on the ring.  This function schedules the IB
3757  * on the gfx ring for execution by the GPU.
3758  */
3759 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3760 {
3761         struct radeon_ring *ring = &rdev->ring[ib->ring];
3762         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3763         u32 header, control = INDIRECT_BUFFER_VALID;
3764
3765         if (ib->is_const_ib) {
3766                 /* set switch buffer packet before const IB */
3767                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3768                 radeon_ring_write(ring, 0);
3769
3770                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3771         } else {
3772                 u32 next_rptr;
3773                 if (ring->rptr_save_reg) {
3774                         next_rptr = ring->wptr + 3 + 4;
3775                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3776                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3777                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3778                         radeon_ring_write(ring, next_rptr);
3779                 } else if (rdev->wb.enabled) {
3780                         next_rptr = ring->wptr + 5 + 4;
3781                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3782                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3783                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3784                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3785                         radeon_ring_write(ring, next_rptr);
3786                 }
3787
3788                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3789         }
3790
3791         control |= ib->length_dw | (vm_id << 24);
3792
3793         radeon_ring_write(ring, header);
3794         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3795         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3796         radeon_ring_write(ring, control);
3797 }
3798
3799 /**
3800  * cik_ib_test - basic gfx ring IB test
3801  *
3802  * @rdev: radeon_device pointer
3803  * @ring: radeon_ring structure holding ring information
3804  *
3805  * Allocate an IB and execute it on the gfx ring (CIK).
3806  * Provides a basic gfx ring test to verify that IBs are working.
3807  * Returns 0 on success, error on failure.
3808  */
3809 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3810 {
3811         struct radeon_ib ib;
3812         uint32_t scratch;
3813         uint32_t tmp = 0;
3814         unsigned i;
3815         int r;
3816
3817         r = radeon_scratch_get(rdev, &scratch);
3818         if (r) {
3819                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3820                 return r;
3821         }
3822         WREG32(scratch, 0xCAFEDEAD);
3823         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3824         if (r) {
3825                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3826                 radeon_scratch_free(rdev, scratch);
3827                 return r;
3828         }
3829         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3830         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3831         ib.ptr[2] = 0xDEADBEEF;
3832         ib.length_dw = 3;
3833         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3834         if (r) {
3835                 radeon_scratch_free(rdev, scratch);
3836                 radeon_ib_free(rdev, &ib);
3837                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3838                 return r;
3839         }
3840         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3841                 RADEON_USEC_IB_TEST_TIMEOUT));
3842         if (r < 0) {
3843                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3844                 radeon_scratch_free(rdev, scratch);
3845                 radeon_ib_free(rdev, &ib);
3846                 return r;
3847         } else if (r == 0) {
3848                 DRM_ERROR("radeon: fence wait timed out.\n");
3849                 radeon_scratch_free(rdev, scratch);
3850                 radeon_ib_free(rdev, &ib);
3851                 return -ETIMEDOUT;
3852         }
3853         r = 0;
3854         for (i = 0; i < rdev->usec_timeout; i++) {
3855                 tmp = RREG32(scratch);
3856                 if (tmp == 0xDEADBEEF)
3857                         break;
3858                 DRM_UDELAY(1);
3859         }
3860         if (i < rdev->usec_timeout) {
3861                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3862         } else {
3863                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3864                           scratch, tmp);
3865                 r = -EINVAL;
3866         }
3867         radeon_scratch_free(rdev, scratch);
3868         radeon_ib_free(rdev, &ib);
3869         return r;
3870 }
3871
3872 /*
3873  * CP.
3874  * On CIK, gfx and compute now have independant command processors.
3875  *
3876  * GFX
3877  * Gfx consists of a single ring and can process both gfx jobs and
3878  * compute jobs.  The gfx CP consists of three microengines (ME):
3879  * PFP - Pre-Fetch Parser
3880  * ME - Micro Engine
3881  * CE - Constant Engine
3882  * The PFP and ME make up what is considered the Drawing Engine (DE).
3883  * The CE is an asynchronous engine used for updating buffer desciptors
3884  * used by the DE so that they can be loaded into cache in parallel
3885  * while the DE is processing state update packets.
3886  *
3887  * Compute
3888  * The compute CP consists of two microengines (ME):
3889  * MEC1 - Compute MicroEngine 1
3890  * MEC2 - Compute MicroEngine 2
3891  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3892  * The queues are exposed to userspace and are programmed directly
3893  * by the compute runtime.
3894  */
3895 /**
3896  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3897  *
3898  * @rdev: radeon_device pointer
3899  * @enable: enable or disable the MEs
3900  *
3901  * Halts or unhalts the gfx MEs.
3902  */
3903 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3904 {
3905         if (enable)
3906                 WREG32(CP_ME_CNTL, 0);
3907         else {
3908                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3909                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3910                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3911                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3912         }
3913         udelay(50);
3914 }
3915
3916 /**
3917  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3918  *
3919  * @rdev: radeon_device pointer
3920  *
3921  * Loads the gfx PFP, ME, and CE ucode.
3922  * Returns 0 for success, -EINVAL if the ucode is not available.
3923  */
3924 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3925 {
3926         int i;
3927
3928         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3929                 return -EINVAL;
3930
3931         cik_cp_gfx_enable(rdev, false);
3932
3933         if (rdev->new_fw) {
3934                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3935                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3936                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3937                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3938                 const struct gfx_firmware_header_v1_0 *me_hdr =
3939                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3940                 const __le32 *fw_data;
3941                 u32 fw_size;
3942
3943                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3944                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3945                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3946
3947                 /* PFP */
3948                 fw_data = (const __le32 *)
3949                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3950                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3951                 WREG32(CP_PFP_UCODE_ADDR, 0);
3952                 for (i = 0; i < fw_size; i++)
3953                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3954                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3955
3956                 /* CE */
3957                 fw_data = (const __le32 *)
3958                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3959                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3960                 WREG32(CP_CE_UCODE_ADDR, 0);
3961                 for (i = 0; i < fw_size; i++)
3962                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3963                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3964
3965                 /* ME */
3966                 fw_data = (const __be32 *)
3967                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3968                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3969                 WREG32(CP_ME_RAM_WADDR, 0);
3970                 for (i = 0; i < fw_size; i++)
3971                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3972                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3973                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3974         } else {
3975                 const __be32 *fw_data;
3976
3977                 /* PFP */
3978                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3979                 WREG32(CP_PFP_UCODE_ADDR, 0);
3980                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3981                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3982                 WREG32(CP_PFP_UCODE_ADDR, 0);
3983
3984                 /* CE */
3985                 fw_data = (const __be32 *)rdev->ce_fw->data;
3986                 WREG32(CP_CE_UCODE_ADDR, 0);
3987                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3988                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3989                 WREG32(CP_CE_UCODE_ADDR, 0);
3990
3991                 /* ME */
3992                 fw_data = (const __be32 *)rdev->me_fw->data;
3993                 WREG32(CP_ME_RAM_WADDR, 0);
3994                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3995                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3996                 WREG32(CP_ME_RAM_WADDR, 0);
3997         }
3998
3999         return 0;
4000 }
4001
4002 /**
4003  * cik_cp_gfx_start - start the gfx ring
4004  *
4005  * @rdev: radeon_device pointer
4006  *
4007  * Enables the ring and loads the clear state context and other
4008  * packets required to init the ring.
4009  * Returns 0 for success, error for failure.
4010  */
4011 static int cik_cp_gfx_start(struct radeon_device *rdev)
4012 {
4013         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4014         int r, i;
4015
4016         /* init the CP */
4017         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4018         WREG32(CP_ENDIAN_SWAP, 0);
4019         WREG32(CP_DEVICE_ID, 1);
4020
4021         cik_cp_gfx_enable(rdev, true);
4022
4023         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4024         if (r) {
4025                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4026                 return r;
4027         }
4028
4029         /* init the CE partitions.  CE only used for gfx on CIK */
4030         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4031         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4032         radeon_ring_write(ring, 0x8000);
4033         radeon_ring_write(ring, 0x8000);
4034
4035         /* setup clear context state */
4036         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4037         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4038
4039         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4040         radeon_ring_write(ring, 0x80000000);
4041         radeon_ring_write(ring, 0x80000000);
4042
4043         for (i = 0; i < cik_default_size; i++)
4044                 radeon_ring_write(ring, cik_default_state[i]);
4045
4046         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4047         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4048
4049         /* set clear context state */
4050         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4051         radeon_ring_write(ring, 0);
4052
4053         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4054         radeon_ring_write(ring, 0x00000316);
4055         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4056         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4057
4058         radeon_ring_unlock_commit(rdev, ring, false);
4059
4060         return 0;
4061 }
4062
4063 /**
4064  * cik_cp_gfx_fini - stop the gfx ring
4065  *
4066  * @rdev: radeon_device pointer
4067  *
4068  * Stop the gfx ring and tear down the driver ring
4069  * info.
4070  */
4071 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4072 {
4073         cik_cp_gfx_enable(rdev, false);
4074         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4075 }
4076
4077 /**
4078  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4079  *
4080  * @rdev: radeon_device pointer
4081  *
4082  * Program the location and size of the gfx ring buffer
4083  * and test it to make sure it's working.
4084  * Returns 0 for success, error for failure.
4085  */
4086 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4087 {
4088         struct radeon_ring *ring;
4089         u32 tmp;
4090         u32 rb_bufsz;
4091         u64 rb_addr;
4092         int r;
4093
4094         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4095         if (rdev->family != CHIP_HAWAII)
4096                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4097
4098         /* Set the write pointer delay */
4099         WREG32(CP_RB_WPTR_DELAY, 0);
4100
4101         /* set the RB to use vmid 0 */
4102         WREG32(CP_RB_VMID, 0);
4103
4104         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4105
4106         /* ring 0 - compute and gfx */
4107         /* Set ring buffer size */
4108         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4109         rb_bufsz = order_base_2(ring->ring_size / 8);
4110         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4111 #ifdef __BIG_ENDIAN
4112         tmp |= BUF_SWAP_32BIT;
4113 #endif
4114         WREG32(CP_RB0_CNTL, tmp);
4115
4116         /* Initialize the ring buffer's read and write pointers */
4117         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4118         ring->wptr = 0;
4119         WREG32(CP_RB0_WPTR, ring->wptr);
4120
4121         /* set the wb address wether it's enabled or not */
4122         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4123         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4124
4125         /* scratch register shadowing is no longer supported */
4126         WREG32(SCRATCH_UMSK, 0);
4127
4128         if (!rdev->wb.enabled)
4129                 tmp |= RB_NO_UPDATE;
4130
4131         mdelay(1);
4132         WREG32(CP_RB0_CNTL, tmp);
4133
4134         rb_addr = ring->gpu_addr >> 8;
4135         WREG32(CP_RB0_BASE, rb_addr);
4136         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4137
4138         /* start the ring */
4139         cik_cp_gfx_start(rdev);
4140         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4141         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4142         if (r) {
4143                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4144                 return r;
4145         }
4146
4147         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4148                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4149
4150         return 0;
4151 }
4152
4153 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4154                      struct radeon_ring *ring)
4155 {
4156         u32 rptr;
4157
4158         if (rdev->wb.enabled)
4159                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4160         else
4161                 rptr = RREG32(CP_RB0_RPTR);
4162
4163         return rptr;
4164 }
4165
4166 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4167                      struct radeon_ring *ring)
4168 {
4169         return RREG32(CP_RB0_WPTR);
4170 }
4171
4172 void cik_gfx_set_wptr(struct radeon_device *rdev,
4173                       struct radeon_ring *ring)
4174 {
4175         WREG32(CP_RB0_WPTR, ring->wptr);
4176         (void)RREG32(CP_RB0_WPTR);
4177 }
4178
4179 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4180                          struct radeon_ring *ring)
4181 {
4182         u32 rptr;
4183
4184         if (rdev->wb.enabled) {
4185                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4186         } else {
4187                 mutex_lock(&rdev->srbm_mutex);
4188                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4189                 rptr = RREG32(CP_HQD_PQ_RPTR);
4190                 cik_srbm_select(rdev, 0, 0, 0, 0);
4191                 mutex_unlock(&rdev->srbm_mutex);
4192         }
4193
4194         return rptr;
4195 }
4196
4197 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4198                          struct radeon_ring *ring)
4199 {
4200         u32 wptr;
4201
4202         if (rdev->wb.enabled) {
4203                 /* XXX check if swapping is necessary on BE */
4204                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4205         } else {
4206                 mutex_lock(&rdev->srbm_mutex);
4207                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4208                 wptr = RREG32(CP_HQD_PQ_WPTR);
4209                 cik_srbm_select(rdev, 0, 0, 0, 0);
4210                 mutex_unlock(&rdev->srbm_mutex);
4211         }
4212
4213         return wptr;
4214 }
4215
4216 void cik_compute_set_wptr(struct radeon_device *rdev,
4217                           struct radeon_ring *ring)
4218 {
4219         /* XXX check if swapping is necessary on BE */
4220         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4221         WDOORBELL32(ring->doorbell_index, ring->wptr);
4222 }
4223
4224 static void cik_compute_stop(struct radeon_device *rdev,
4225                              struct radeon_ring *ring)
4226 {
4227         u32 j, tmp;
4228
4229         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4230         /* Disable wptr polling. */
4231         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4232         tmp &= ~WPTR_POLL_EN;
4233         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4234         /* Disable HQD. */
4235         if (RREG32(CP_HQD_ACTIVE) & 1) {
4236                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4237                 for (j = 0; j < rdev->usec_timeout; j++) {
4238                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4239                                 break;
4240                         udelay(1);
4241                 }
4242                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4243                 WREG32(CP_HQD_PQ_RPTR, 0);
4244                 WREG32(CP_HQD_PQ_WPTR, 0);
4245         }
4246         cik_srbm_select(rdev, 0, 0, 0, 0);
4247 }
4248
4249 /**
4250  * cik_cp_compute_enable - enable/disable the compute CP MEs
4251  *
4252  * @rdev: radeon_device pointer
4253  * @enable: enable or disable the MEs
4254  *
4255  * Halts or unhalts the compute MEs.
4256  */
4257 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4258 {
4259         if (enable)
4260                 WREG32(CP_MEC_CNTL, 0);
4261         else {
4262                 /*
4263                  * To make hibernation reliable we need to clear compute ring
4264                  * configuration before halting the compute ring.
4265                  */
4266                 mutex_lock(&rdev->srbm_mutex);
4267                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4268                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4269                 mutex_unlock(&rdev->srbm_mutex);
4270
4271                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4272                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4273                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4274         }
4275         udelay(50);
4276 }
4277
4278 /**
4279  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4280  *
4281  * @rdev: radeon_device pointer
4282  *
4283  * Loads the compute MEC1&2 ucode.
4284  * Returns 0 for success, -EINVAL if the ucode is not available.
4285  */
4286 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4287 {
4288         int i;
4289
4290         if (!rdev->mec_fw)
4291                 return -EINVAL;
4292
4293         cik_cp_compute_enable(rdev, false);
4294
4295         if (rdev->new_fw) {
4296                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4297                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4298                 const __le32 *fw_data;
4299                 u32 fw_size;
4300
4301                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4302
4303                 /* MEC1 */
4304                 fw_data = (const __le32 *)
4305                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4306                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4307                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4308                 for (i = 0; i < fw_size; i++)
4309                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4310                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4311
4312                 /* MEC2 */
4313                 if (rdev->family == CHIP_KAVERI) {
4314                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4315                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4316
4317                         fw_data = (const __le32 *)
4318                                 (rdev->mec2_fw->data +
4319                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4320                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4321                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4322                         for (i = 0; i < fw_size; i++)
4323                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4324                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4325                 }
4326         } else {
4327                 const __be32 *fw_data;
4328
4329                 /* MEC1 */
4330                 fw_data = (const __be32 *)rdev->mec_fw->data;
4331                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4332                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4333                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4334                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4335
4336                 if (rdev->family == CHIP_KAVERI) {
4337                         /* MEC2 */
4338                         fw_data = (const __be32 *)rdev->mec_fw->data;
4339                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4340                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4341                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4342                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4343                 }
4344         }
4345
4346         return 0;
4347 }
4348
4349 /**
4350  * cik_cp_compute_start - start the compute queues
4351  *
4352  * @rdev: radeon_device pointer
4353  *
4354  * Enable the compute queues.
4355  * Returns 0 for success, error for failure.
4356  */
4357 static int cik_cp_compute_start(struct radeon_device *rdev)
4358 {
4359         cik_cp_compute_enable(rdev, true);
4360
4361         return 0;
4362 }
4363
4364 /**
4365  * cik_cp_compute_fini - stop the compute queues
4366  *
4367  * @rdev: radeon_device pointer
4368  *
4369  * Stop the compute queues and tear down the driver queue
4370  * info.
4371  */
4372 static void cik_cp_compute_fini(struct radeon_device *rdev)
4373 {
4374         int i, idx, r;
4375
4376         cik_cp_compute_enable(rdev, false);
4377
4378         for (i = 0; i < 2; i++) {
4379                 if (i == 0)
4380                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4381                 else
4382                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4383
4384                 if (rdev->ring[idx].mqd_obj) {
4385                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4386                         if (unlikely(r != 0))
4387                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4388
4389                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4390                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4391
4392                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4393                         rdev->ring[idx].mqd_obj = NULL;
4394                 }
4395         }
4396 }
4397
4398 static void cik_mec_fini(struct radeon_device *rdev)
4399 {
4400         int r;
4401
4402         if (rdev->mec.hpd_eop_obj) {
4403                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4404                 if (unlikely(r != 0))
4405                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4406                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4407                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4408
4409                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4410                 rdev->mec.hpd_eop_obj = NULL;
4411         }
4412 }
4413
4414 #define MEC_HPD_SIZE 2048
4415
4416 static int cik_mec_init(struct radeon_device *rdev)
4417 {
4418         int r;
4419         u32 *hpd;
4420
4421         /*
4422          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4423          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4424          * Nonetheless, we assign only 1 pipe because all other pipes will
4425          * be handled by KFD
4426          */
4427         rdev->mec.num_mec = 1;
4428         rdev->mec.num_pipe = 1;
4429         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4430
4431         if (rdev->mec.hpd_eop_obj == NULL) {
4432                 r = radeon_bo_create(rdev,
4433                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4434                                      PAGE_SIZE, true,
4435                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4436                                      &rdev->mec.hpd_eop_obj);
4437                 if (r) {
4438                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4439                         return r;
4440                 }
4441         }
4442
4443         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4444         if (unlikely(r != 0)) {
4445                 cik_mec_fini(rdev);
4446                 return r;
4447         }
4448         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4449                           &rdev->mec.hpd_eop_gpu_addr);
4450         if (r) {
4451                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4452                 cik_mec_fini(rdev);
4453                 return r;
4454         }
4455         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4456         if (r) {
4457                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4458                 cik_mec_fini(rdev);
4459                 return r;
4460         }
4461
4462         /* clear memory.  Not sure if this is required or not */
4463         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4464
4465         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4466         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4467
4468         return 0;
4469 }
4470
4471 struct hqd_registers
4472 {
4473         u32 cp_mqd_base_addr;
4474         u32 cp_mqd_base_addr_hi;
4475         u32 cp_hqd_active;
4476         u32 cp_hqd_vmid;
4477         u32 cp_hqd_persistent_state;
4478         u32 cp_hqd_pipe_priority;
4479         u32 cp_hqd_queue_priority;
4480         u32 cp_hqd_quantum;
4481         u32 cp_hqd_pq_base;
4482         u32 cp_hqd_pq_base_hi;
4483         u32 cp_hqd_pq_rptr;
4484         u32 cp_hqd_pq_rptr_report_addr;
4485         u32 cp_hqd_pq_rptr_report_addr_hi;
4486         u32 cp_hqd_pq_wptr_poll_addr;
4487         u32 cp_hqd_pq_wptr_poll_addr_hi;
4488         u32 cp_hqd_pq_doorbell_control;
4489         u32 cp_hqd_pq_wptr;
4490         u32 cp_hqd_pq_control;
4491         u32 cp_hqd_ib_base_addr;
4492         u32 cp_hqd_ib_base_addr_hi;
4493         u32 cp_hqd_ib_rptr;
4494         u32 cp_hqd_ib_control;
4495         u32 cp_hqd_iq_timer;
4496         u32 cp_hqd_iq_rptr;
4497         u32 cp_hqd_dequeue_request;
4498         u32 cp_hqd_dma_offload;
4499         u32 cp_hqd_sema_cmd;
4500         u32 cp_hqd_msg_type;
4501         u32 cp_hqd_atomic0_preop_lo;
4502         u32 cp_hqd_atomic0_preop_hi;
4503         u32 cp_hqd_atomic1_preop_lo;
4504         u32 cp_hqd_atomic1_preop_hi;
4505         u32 cp_hqd_hq_scheduler0;
4506         u32 cp_hqd_hq_scheduler1;
4507         u32 cp_mqd_control;
4508 };
4509
4510 struct bonaire_mqd
4511 {
4512         u32 header;
4513         u32 dispatch_initiator;
4514         u32 dimensions[3];
4515         u32 start_idx[3];
4516         u32 num_threads[3];
4517         u32 pipeline_stat_enable;
4518         u32 perf_counter_enable;
4519         u32 pgm[2];
4520         u32 tba[2];
4521         u32 tma[2];
4522         u32 pgm_rsrc[2];
4523         u32 vmid;
4524         u32 resource_limits;
4525         u32 static_thread_mgmt01[2];
4526         u32 tmp_ring_size;
4527         u32 static_thread_mgmt23[2];
4528         u32 restart[3];
4529         u32 thread_trace_enable;
4530         u32 reserved1;
4531         u32 user_data[16];
4532         u32 vgtcs_invoke_count[2];
4533         struct hqd_registers queue_state;
4534         u32 dequeue_cntr;
4535         u32 interrupt_queue[64];
4536 };
4537
4538 /**
4539  * cik_cp_compute_resume - setup the compute queue registers
4540  *
4541  * @rdev: radeon_device pointer
4542  *
4543  * Program the compute queues and test them to make sure they
4544  * are working.
4545  * Returns 0 for success, error for failure.
4546  */
4547 static int cik_cp_compute_resume(struct radeon_device *rdev)
4548 {
4549         int r, i, j, idx;
4550         u32 tmp;
4551         bool use_doorbell = true;
4552         u64 hqd_gpu_addr;
4553         u64 mqd_gpu_addr;
4554         u64 eop_gpu_addr;
4555         u64 wb_gpu_addr;
4556         u32 *buf;
4557         struct bonaire_mqd *mqd;
4558
4559         r = cik_cp_compute_start(rdev);
4560         if (r)
4561                 return r;
4562
4563         /* fix up chicken bits */
4564         tmp = RREG32(CP_CPF_DEBUG);
4565         tmp |= (1 << 23);
4566         WREG32(CP_CPF_DEBUG, tmp);
4567
4568         /* init the pipes */
4569         mutex_lock(&rdev->srbm_mutex);
4570
4571         for (i = 0; i < rdev->mec.num_pipe; ++i) {
4572                 cik_srbm_select(rdev, 0, i, 0, 0);
4573
4574                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4575                 /* write the EOP addr */
4576                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4577                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4578
4579                 /* set the VMID assigned */
4580                 WREG32(CP_HPD_EOP_VMID, 0);
4581
4582                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4583                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4584                 tmp &= ~EOP_SIZE_MASK;
4585                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4586                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4587
4588         }
4589         mutex_unlock(&rdev->srbm_mutex);
4590
4591         /* init the queues.  Just two for now. */
4592         for (i = 0; i < 2; i++) {
4593                 if (i == 0)
4594                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4595                 else
4596                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4597
4598                 if (rdev->ring[idx].mqd_obj == NULL) {
4599                         r = radeon_bo_create(rdev,
4600                                              sizeof(struct bonaire_mqd),
4601                                              PAGE_SIZE, true,
4602                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4603                                              NULL, &rdev->ring[idx].mqd_obj);
4604                         if (r) {
4605                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4606                                 return r;
4607                         }
4608                 }
4609
4610                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4611                 if (unlikely(r != 0)) {
4612                         cik_cp_compute_fini(rdev);
4613                         return r;
4614                 }
4615                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4616                                   &mqd_gpu_addr);
4617                 if (r) {
4618                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4619                         cik_cp_compute_fini(rdev);
4620                         return r;
4621                 }
4622                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4623                 if (r) {
4624                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4625                         cik_cp_compute_fini(rdev);
4626                         return r;
4627                 }
4628
4629                 /* init the mqd struct */
4630                 memset(buf, 0, sizeof(struct bonaire_mqd));
4631
4632                 mqd = (struct bonaire_mqd *)buf;
4633                 mqd->header = 0xC0310800;
4634                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4635                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4636                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4637                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4638
4639                 mutex_lock(&rdev->srbm_mutex);
4640                 cik_srbm_select(rdev, rdev->ring[idx].me,
4641                                 rdev->ring[idx].pipe,
4642                                 rdev->ring[idx].queue, 0);
4643
4644                 /* disable wptr polling */
4645                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4646                 tmp &= ~WPTR_POLL_EN;
4647                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4648
4649                 /* enable doorbell? */
4650                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4651                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4652                 if (use_doorbell)
4653                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4654                 else
4655                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4656                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4657                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4658
4659                 /* disable the queue if it's active */
4660                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4661                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4662                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4663                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4664                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4665                         for (j = 0; j < rdev->usec_timeout; j++) {
4666                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4667                                         break;
4668                                 udelay(1);
4669                         }
4670                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4671                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4672                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4673                 }
4674
4675                 /* set the pointer to the MQD */
4676                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4677                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4678                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4679                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4680                 /* set MQD vmid to 0 */
4681                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4682                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4683                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4684
4685                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4686                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4687                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4688                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4689                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4690                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4691
4692                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4693                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4694                 mqd->queue_state.cp_hqd_pq_control &=
4695                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4696
4697                 mqd->queue_state.cp_hqd_pq_control |=
4698                         order_base_2(rdev->ring[idx].ring_size / 8);
4699                 mqd->queue_state.cp_hqd_pq_control |=
4700                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4701 #ifdef __BIG_ENDIAN
4702                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4703 #endif
4704                 mqd->queue_state.cp_hqd_pq_control &=
4705                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4706                 mqd->queue_state.cp_hqd_pq_control |=
4707                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4708                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4709
4710                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4711                 if (i == 0)
4712                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4713                 else
4714                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4715                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4716                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4717                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4718                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4719                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4720
4721                 /* set the wb address wether it's enabled or not */
4722                 if (i == 0)
4723                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4724                 else
4725                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4726                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4727                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4728                         upper_32_bits(wb_gpu_addr) & 0xffff;
4729                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4730                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4731                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4732                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4733
4734                 /* enable the doorbell if requested */
4735                 if (use_doorbell) {
4736                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4737                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4738                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4739                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4740                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4741                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4742                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4743                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4744
4745                 } else {
4746                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4747                 }
4748                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4749                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4750
4751                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4752                 rdev->ring[idx].wptr = 0;
4753                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4754                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4755                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4756
4757                 /* set the vmid for the queue */
4758                 mqd->queue_state.cp_hqd_vmid = 0;
4759                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4760
4761                 /* activate the queue */
4762                 mqd->queue_state.cp_hqd_active = 1;
4763                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4764
4765                 cik_srbm_select(rdev, 0, 0, 0, 0);
4766                 mutex_unlock(&rdev->srbm_mutex);
4767
4768                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4769                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4770
4771                 rdev->ring[idx].ready = true;
4772                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4773                 if (r)
4774                         rdev->ring[idx].ready = false;
4775         }
4776
4777         return 0;
4778 }
4779
4780 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4781 {
4782         cik_cp_gfx_enable(rdev, enable);
4783         cik_cp_compute_enable(rdev, enable);
4784 }
4785
4786 static int cik_cp_load_microcode(struct radeon_device *rdev)
4787 {
4788         int r;
4789
4790         r = cik_cp_gfx_load_microcode(rdev);
4791         if (r)
4792                 return r;
4793         r = cik_cp_compute_load_microcode(rdev);
4794         if (r)
4795                 return r;
4796
4797         return 0;
4798 }
4799
4800 static void cik_cp_fini(struct radeon_device *rdev)
4801 {
4802         cik_cp_gfx_fini(rdev);
4803         cik_cp_compute_fini(rdev);
4804 }
4805
4806 static int cik_cp_resume(struct radeon_device *rdev)
4807 {
4808         int r;
4809
4810         cik_enable_gui_idle_interrupt(rdev, false);
4811
4812         r = cik_cp_load_microcode(rdev);
4813         if (r)
4814                 return r;
4815
4816         r = cik_cp_gfx_resume(rdev);
4817         if (r)
4818                 return r;
4819         r = cik_cp_compute_resume(rdev);
4820         if (r)
4821                 return r;
4822
4823         cik_enable_gui_idle_interrupt(rdev, true);
4824
4825         return 0;
4826 }
4827
4828 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4829 {
4830         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4831                 RREG32(GRBM_STATUS));
4832         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4833                 RREG32(GRBM_STATUS2));
4834         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4835                 RREG32(GRBM_STATUS_SE0));
4836         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4837                 RREG32(GRBM_STATUS_SE1));
4838         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4839                 RREG32(GRBM_STATUS_SE2));
4840         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4841                 RREG32(GRBM_STATUS_SE3));
4842         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4843                 RREG32(SRBM_STATUS));
4844         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4845                 RREG32(SRBM_STATUS2));
4846         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4847                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4848         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4849                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4850         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4851         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4852                  RREG32(CP_STALLED_STAT1));
4853         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4854                  RREG32(CP_STALLED_STAT2));
4855         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4856                  RREG32(CP_STALLED_STAT3));
4857         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4858                  RREG32(CP_CPF_BUSY_STAT));
4859         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4860                  RREG32(CP_CPF_STALLED_STAT1));
4861         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4862         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4863         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4864                  RREG32(CP_CPC_STALLED_STAT1));
4865         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4866 }
4867
4868 /**
4869  * cik_gpu_check_soft_reset - check which blocks are busy
4870  *
4871  * @rdev: radeon_device pointer
4872  *
4873  * Check which blocks are busy and return the relevant reset
4874  * mask to be used by cik_gpu_soft_reset().
4875  * Returns a mask of the blocks to be reset.
4876  */
4877 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4878 {
4879         u32 reset_mask = 0;
4880         u32 tmp;
4881
4882         /* GRBM_STATUS */
4883         tmp = RREG32(GRBM_STATUS);
4884         if (tmp & (PA_BUSY | SC_BUSY |
4885                    BCI_BUSY | SX_BUSY |
4886                    TA_BUSY | VGT_BUSY |
4887                    DB_BUSY | CB_BUSY |
4888                    GDS_BUSY | SPI_BUSY |
4889                    IA_BUSY | IA_BUSY_NO_DMA))
4890                 reset_mask |= RADEON_RESET_GFX;
4891
4892         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4893                 reset_mask |= RADEON_RESET_CP;
4894
4895         /* GRBM_STATUS2 */
4896         tmp = RREG32(GRBM_STATUS2);
4897         if (tmp & RLC_BUSY)
4898                 reset_mask |= RADEON_RESET_RLC;
4899
4900         /* SDMA0_STATUS_REG */
4901         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4902         if (!(tmp & SDMA_IDLE))
4903                 reset_mask |= RADEON_RESET_DMA;
4904
4905         /* SDMA1_STATUS_REG */
4906         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4907         if (!(tmp & SDMA_IDLE))
4908                 reset_mask |= RADEON_RESET_DMA1;
4909
4910         /* SRBM_STATUS2 */
4911         tmp = RREG32(SRBM_STATUS2);
4912         if (tmp & SDMA_BUSY)
4913                 reset_mask |= RADEON_RESET_DMA;
4914
4915         if (tmp & SDMA1_BUSY)
4916                 reset_mask |= RADEON_RESET_DMA1;
4917
4918         /* SRBM_STATUS */
4919         tmp = RREG32(SRBM_STATUS);
4920
4921         if (tmp & IH_BUSY)
4922                 reset_mask |= RADEON_RESET_IH;
4923
4924         if (tmp & SEM_BUSY)
4925                 reset_mask |= RADEON_RESET_SEM;
4926
4927         if (tmp & GRBM_RQ_PENDING)
4928                 reset_mask |= RADEON_RESET_GRBM;
4929
4930         if (tmp & VMC_BUSY)
4931                 reset_mask |= RADEON_RESET_VMC;
4932
4933         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4934                    MCC_BUSY | MCD_BUSY))
4935                 reset_mask |= RADEON_RESET_MC;
4936
4937         if (evergreen_is_display_hung(rdev))
4938                 reset_mask |= RADEON_RESET_DISPLAY;
4939
4940         /* Skip MC reset as it's mostly likely not hung, just busy */
4941         if (reset_mask & RADEON_RESET_MC) {
4942                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4943                 reset_mask &= ~RADEON_RESET_MC;
4944         }
4945
4946         return reset_mask;
4947 }
4948
4949 /**
4950  * cik_gpu_soft_reset - soft reset GPU
4951  *
4952  * @rdev: radeon_device pointer
4953  * @reset_mask: mask of which blocks to reset
4954  *
4955  * Soft reset the blocks specified in @reset_mask.
4956  */
4957 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4958 {
4959         struct evergreen_mc_save save;
4960         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4961         u32 tmp;
4962
4963         if (reset_mask == 0)
4964                 return;
4965
4966         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4967
4968         cik_print_gpu_status_regs(rdev);
4969         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4970                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4971         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4972                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4973
4974         /* disable CG/PG */
4975         cik_fini_pg(rdev);
4976         cik_fini_cg(rdev);
4977
4978         /* stop the rlc */
4979         cik_rlc_stop(rdev);
4980
4981         /* Disable GFX parsing/prefetching */
4982         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4983
4984         /* Disable MEC parsing/prefetching */
4985         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4986
4987         if (reset_mask & RADEON_RESET_DMA) {
4988                 /* sdma0 */
4989                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4990                 tmp |= SDMA_HALT;
4991                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4992         }
4993         if (reset_mask & RADEON_RESET_DMA1) {
4994                 /* sdma1 */
4995                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4996                 tmp |= SDMA_HALT;
4997                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4998         }
4999
5000         evergreen_mc_stop(rdev, &save);
5001         if (evergreen_mc_wait_for_idle(rdev)) {
5002                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5003         }
5004
5005         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5006                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5007
5008         if (reset_mask & RADEON_RESET_CP) {
5009                 grbm_soft_reset |= SOFT_RESET_CP;
5010
5011                 srbm_soft_reset |= SOFT_RESET_GRBM;
5012         }
5013
5014         if (reset_mask & RADEON_RESET_DMA)
5015                 srbm_soft_reset |= SOFT_RESET_SDMA;
5016
5017         if (reset_mask & RADEON_RESET_DMA1)
5018                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5019
5020         if (reset_mask & RADEON_RESET_DISPLAY)
5021                 srbm_soft_reset |= SOFT_RESET_DC;
5022
5023         if (reset_mask & RADEON_RESET_RLC)
5024                 grbm_soft_reset |= SOFT_RESET_RLC;
5025
5026         if (reset_mask & RADEON_RESET_SEM)
5027                 srbm_soft_reset |= SOFT_RESET_SEM;
5028
5029         if (reset_mask & RADEON_RESET_IH)
5030                 srbm_soft_reset |= SOFT_RESET_IH;
5031
5032         if (reset_mask & RADEON_RESET_GRBM)
5033                 srbm_soft_reset |= SOFT_RESET_GRBM;
5034
5035         if (reset_mask & RADEON_RESET_VMC)
5036                 srbm_soft_reset |= SOFT_RESET_VMC;
5037
5038         if (!(rdev->flags & RADEON_IS_IGP)) {
5039                 if (reset_mask & RADEON_RESET_MC)
5040                         srbm_soft_reset |= SOFT_RESET_MC;
5041         }
5042
5043         if (grbm_soft_reset) {
5044                 tmp = RREG32(GRBM_SOFT_RESET);
5045                 tmp |= grbm_soft_reset;
5046                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5047                 WREG32(GRBM_SOFT_RESET, tmp);
5048                 tmp = RREG32(GRBM_SOFT_RESET);
5049
5050                 udelay(50);
5051
5052                 tmp &= ~grbm_soft_reset;
5053                 WREG32(GRBM_SOFT_RESET, tmp);
5054                 tmp = RREG32(GRBM_SOFT_RESET);
5055         }
5056
5057         if (srbm_soft_reset) {
5058                 tmp = RREG32(SRBM_SOFT_RESET);
5059                 tmp |= srbm_soft_reset;
5060                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5061                 WREG32(SRBM_SOFT_RESET, tmp);
5062                 tmp = RREG32(SRBM_SOFT_RESET);
5063
5064                 udelay(50);
5065
5066                 tmp &= ~srbm_soft_reset;
5067                 WREG32(SRBM_SOFT_RESET, tmp);
5068                 tmp = RREG32(SRBM_SOFT_RESET);
5069         }
5070
5071         /* Wait a little for things to settle down */
5072         udelay(50);
5073
5074         evergreen_mc_resume(rdev, &save);
5075         udelay(50);
5076
5077         cik_print_gpu_status_regs(rdev);
5078 }
5079
5080 struct kv_reset_save_regs {
5081         u32 gmcon_reng_execute;
5082         u32 gmcon_misc;
5083         u32 gmcon_misc3;
5084 };
5085
5086 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5087                                    struct kv_reset_save_regs *save)
5088 {
5089         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5090         save->gmcon_misc = RREG32(GMCON_MISC);
5091         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5092
5093         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5094         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5095                                                 STCTRL_STUTTER_EN));
5096 }
5097
5098 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5099                                       struct kv_reset_save_regs *save)
5100 {
5101         int i;
5102
5103         WREG32(GMCON_PGFSM_WRITE, 0);
5104         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5105
5106         for (i = 0; i < 5; i++)
5107                 WREG32(GMCON_PGFSM_WRITE, 0);
5108
5109         WREG32(GMCON_PGFSM_WRITE, 0);
5110         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5111
5112         for (i = 0; i < 5; i++)
5113                 WREG32(GMCON_PGFSM_WRITE, 0);
5114
5115         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5116         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5117
5118         for (i = 0; i < 5; i++)
5119                 WREG32(GMCON_PGFSM_WRITE, 0);
5120
5121         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5122         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5123
5124         for (i = 0; i < 5; i++)
5125                 WREG32(GMCON_PGFSM_WRITE, 0);
5126
5127         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5128         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5129
5130         for (i = 0; i < 5; i++)
5131                 WREG32(GMCON_PGFSM_WRITE, 0);
5132
5133         WREG32(GMCON_PGFSM_WRITE, 0);
5134         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5135
5136         for (i = 0; i < 5; i++)
5137                 WREG32(GMCON_PGFSM_WRITE, 0);
5138
5139         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5140         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5141
5142         for (i = 0; i < 5; i++)
5143                 WREG32(GMCON_PGFSM_WRITE, 0);
5144
5145         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5146         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5147
5148         for (i = 0; i < 5; i++)
5149                 WREG32(GMCON_PGFSM_WRITE, 0);
5150
5151         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5152         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5153
5154         for (i = 0; i < 5; i++)
5155                 WREG32(GMCON_PGFSM_WRITE, 0);
5156
5157         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5158         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5159
5160         for (i = 0; i < 5; i++)
5161                 WREG32(GMCON_PGFSM_WRITE, 0);
5162
5163         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5164         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5165
5166         WREG32(GMCON_MISC3, save->gmcon_misc3);
5167         WREG32(GMCON_MISC, save->gmcon_misc);
5168         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5169 }
5170
5171 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5172 {
5173         struct evergreen_mc_save save;
5174         struct kv_reset_save_regs kv_save = { 0 };
5175         u32 tmp, i;
5176
5177         dev_info(rdev->dev, "GPU pci config reset\n");
5178
5179         /* disable dpm? */
5180
5181         /* disable cg/pg */
5182         cik_fini_pg(rdev);
5183         cik_fini_cg(rdev);
5184
5185         /* Disable GFX parsing/prefetching */
5186         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5187
5188         /* Disable MEC parsing/prefetching */
5189         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5190
5191         /* sdma0 */
5192         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5193         tmp |= SDMA_HALT;
5194         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5195         /* sdma1 */
5196         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5197         tmp |= SDMA_HALT;
5198         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5199         /* XXX other engines? */
5200
5201         /* halt the rlc, disable cp internal ints */
5202         cik_rlc_stop(rdev);
5203
5204         udelay(50);
5205
5206         /* disable mem access */
5207         evergreen_mc_stop(rdev, &save);
5208         if (evergreen_mc_wait_for_idle(rdev)) {
5209                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5210         }
5211
5212         if (rdev->flags & RADEON_IS_IGP)
5213                 kv_save_regs_for_reset(rdev, &kv_save);
5214
5215         /* disable BM */
5216         pci_clear_master(rdev->pdev);
5217         /* reset */
5218         radeon_pci_config_reset(rdev);
5219
5220         udelay(100);
5221
5222         /* wait for asic to come out of reset */
5223         for (i = 0; i < rdev->usec_timeout; i++) {
5224                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5225                         break;
5226                 udelay(1);
5227         }
5228
5229         /* does asic init need to be run first??? */
5230         if (rdev->flags & RADEON_IS_IGP)
5231                 kv_restore_regs_for_reset(rdev, &kv_save);
5232 }
5233
5234 /**
5235  * cik_asic_reset - soft reset GPU
5236  *
5237  * @rdev: radeon_device pointer
5238  * @hard: force hard reset
5239  *
5240  * Look up which blocks are hung and attempt
5241  * to reset them.
5242  * Returns 0 for success.
5243  */
5244 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5245 {
5246         u32 reset_mask;
5247
5248         if (hard) {
5249                 cik_gpu_pci_config_reset(rdev);
5250                 return 0;
5251         }
5252
5253         reset_mask = cik_gpu_check_soft_reset(rdev);
5254
5255         if (reset_mask)
5256                 r600_set_bios_scratch_engine_hung(rdev, true);
5257
5258         /* try soft reset */
5259         cik_gpu_soft_reset(rdev, reset_mask);
5260
5261         reset_mask = cik_gpu_check_soft_reset(rdev);
5262
5263         /* try pci config reset */
5264         if (reset_mask && radeon_hard_reset)
5265                 cik_gpu_pci_config_reset(rdev);
5266
5267         reset_mask = cik_gpu_check_soft_reset(rdev);
5268
5269         if (!reset_mask)
5270                 r600_set_bios_scratch_engine_hung(rdev, false);
5271
5272         return 0;
5273 }
5274
5275 /**
5276  * cik_gfx_is_lockup - check if the 3D engine is locked up
5277  *
5278  * @rdev: radeon_device pointer
5279  * @ring: radeon_ring structure holding ring information
5280  *
5281  * Check if the 3D engine is locked up (CIK).
5282  * Returns true if the engine is locked, false if not.
5283  */
5284 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5285 {
5286         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5287
5288         if (!(reset_mask & (RADEON_RESET_GFX |
5289                             RADEON_RESET_COMPUTE |
5290                             RADEON_RESET_CP))) {
5291                 radeon_ring_lockup_update(rdev, ring);
5292                 return false;
5293         }
5294         return radeon_ring_test_lockup(rdev, ring);
5295 }
5296
5297 /* MC */
5298 /**
5299  * cik_mc_program - program the GPU memory controller
5300  *
5301  * @rdev: radeon_device pointer
5302  *
5303  * Set the location of vram, gart, and AGP in the GPU's
5304  * physical address space (CIK).
5305  */
5306 static void cik_mc_program(struct radeon_device *rdev)
5307 {
5308         struct evergreen_mc_save save;
5309         u32 tmp;
5310         int i, j;
5311
5312         /* Initialize HDP */
5313         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5314                 WREG32((0x2c14 + j), 0x00000000);
5315                 WREG32((0x2c18 + j), 0x00000000);
5316                 WREG32((0x2c1c + j), 0x00000000);
5317                 WREG32((0x2c20 + j), 0x00000000);
5318                 WREG32((0x2c24 + j), 0x00000000);
5319         }
5320         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5321
5322         evergreen_mc_stop(rdev, &save);
5323         if (radeon_mc_wait_for_idle(rdev)) {
5324                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5325         }
5326         /* Lockout access through VGA aperture*/
5327         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5328         /* Update configuration */
5329         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5330                rdev->mc.vram_start >> 12);
5331         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5332                rdev->mc.vram_end >> 12);
5333         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5334                rdev->vram_scratch.gpu_addr >> 12);
5335         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5336         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5337         WREG32(MC_VM_FB_LOCATION, tmp);
5338         /* XXX double check these! */
5339         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5340         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5341         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5342         WREG32(MC_VM_AGP_BASE, 0);
5343         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5344         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5345         if (radeon_mc_wait_for_idle(rdev)) {
5346                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5347         }
5348         evergreen_mc_resume(rdev, &save);
5349         /* we need to own VRAM, so turn off the VGA renderer here
5350          * to stop it overwriting our objects */
5351         rv515_vga_render_disable(rdev);
5352 }
5353
5354 /**
5355  * cik_mc_init - initialize the memory controller driver params
5356  *
5357  * @rdev: radeon_device pointer
5358  *
5359  * Look up the amount of vram, vram width, and decide how to place
5360  * vram and gart within the GPU's physical address space (CIK).
5361  * Returns 0 for success.
5362  */
5363 static int cik_mc_init(struct radeon_device *rdev)
5364 {
5365         u32 tmp;
5366         int chansize, numchan;
5367
5368         /* Get VRAM informations */
5369         rdev->mc.vram_is_ddr = true;
5370         tmp = RREG32(MC_ARB_RAMCFG);
5371         if (tmp & CHANSIZE_MASK) {
5372                 chansize = 64;
5373         } else {
5374                 chansize = 32;
5375         }
5376         tmp = RREG32(MC_SHARED_CHMAP);
5377         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5378         case 0:
5379         default:
5380                 numchan = 1;
5381                 break;
5382         case 1:
5383                 numchan = 2;
5384                 break;
5385         case 2:
5386                 numchan = 4;
5387                 break;
5388         case 3:
5389                 numchan = 8;
5390                 break;
5391         case 4:
5392                 numchan = 3;
5393                 break;
5394         case 5:
5395                 numchan = 6;
5396                 break;
5397         case 6:
5398                 numchan = 10;
5399                 break;
5400         case 7:
5401                 numchan = 12;
5402                 break;
5403         case 8:
5404                 numchan = 16;
5405                 break;
5406         }
5407         rdev->mc.vram_width = numchan * chansize;
5408         /* Could aper size report 0 ? */
5409         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5410         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5411         /* size in MB on si */
5412         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5413         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5414         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5415         si_vram_gtt_location(rdev, &rdev->mc);
5416         radeon_update_bandwidth_info(rdev);
5417
5418         return 0;
5419 }
5420
5421 /*
5422  * GART
5423  * VMID 0 is the physical GPU addresses as used by the kernel.
5424  * VMIDs 1-15 are used for userspace clients and are handled
5425  * by the radeon vm/hsa code.
5426  */
5427 /**
5428  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5429  *
5430  * @rdev: radeon_device pointer
5431  *
5432  * Flush the TLB for the VMID 0 page table (CIK).
5433  */
5434 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5435 {
5436         /* flush hdp cache */
5437         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5438
5439         /* bits 0-15 are the VM contexts0-15 */
5440         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5441 }
5442
5443 /**
5444  * cik_pcie_gart_enable - gart enable
5445  *
5446  * @rdev: radeon_device pointer
5447  *
5448  * This sets up the TLBs, programs the page tables for VMID0,
5449  * sets up the hw for VMIDs 1-15 which are allocated on
5450  * demand, and sets up the global locations for the LDS, GDS,
5451  * and GPUVM for FSA64 clients (CIK).
5452  * Returns 0 for success, errors for failure.
5453  */
5454 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5455 {
5456         int r, i;
5457
5458         if (rdev->gart.robj == NULL) {
5459                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5460                 return -EINVAL;
5461         }
5462         r = radeon_gart_table_vram_pin(rdev);
5463         if (r)
5464                 return r;
5465         /* Setup TLB control */
5466         WREG32(MC_VM_MX_L1_TLB_CNTL,
5467                (0xA << 7) |
5468                ENABLE_L1_TLB |
5469                ENABLE_L1_FRAGMENT_PROCESSING |
5470                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5471                ENABLE_ADVANCED_DRIVER_MODEL |
5472                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5473         /* Setup L2 cache */
5474         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5475                ENABLE_L2_FRAGMENT_PROCESSING |
5476                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5477                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5478                EFFECTIVE_L2_QUEUE_SIZE(7) |
5479                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5480         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5481         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5482                BANK_SELECT(4) |
5483                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5484         /* setup context0 */
5485         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5486         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5487         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5488         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5489                         (u32)(rdev->dummy_page.addr >> 12));
5490         WREG32(VM_CONTEXT0_CNTL2, 0);
5491         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5492                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5493
5494         WREG32(0x15D4, 0);
5495         WREG32(0x15D8, 0);
5496         WREG32(0x15DC, 0);
5497
5498         /* restore context1-15 */
5499         /* set vm size, must be a multiple of 4 */
5500         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5501         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5502         for (i = 1; i < 16; i++) {
5503                 if (i < 8)
5504                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5505                                rdev->vm_manager.saved_table_addr[i]);
5506                 else
5507                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5508                                rdev->vm_manager.saved_table_addr[i]);
5509         }
5510
5511         /* enable context1-15 */
5512         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5513                (u32)(rdev->dummy_page.addr >> 12));
5514         WREG32(VM_CONTEXT1_CNTL2, 4);
5515         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5516                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5517                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5518                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5519                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5520                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5521                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5522                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5523                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5524                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5525                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5526                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5527                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5528                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5529
5530         if (rdev->family == CHIP_KAVERI) {
5531                 u32 tmp = RREG32(CHUB_CONTROL);
5532                 tmp &= ~BYPASS_VM;
5533                 WREG32(CHUB_CONTROL, tmp);
5534         }
5535
5536         /* XXX SH_MEM regs */
5537         /* where to put LDS, scratch, GPUVM in FSA64 space */
5538         mutex_lock(&rdev->srbm_mutex);
5539         for (i = 0; i < 16; i++) {
5540                 cik_srbm_select(rdev, 0, 0, 0, i);
5541                 /* CP and shaders */
5542                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5543                 WREG32(SH_MEM_APE1_BASE, 1);
5544                 WREG32(SH_MEM_APE1_LIMIT, 0);
5545                 WREG32(SH_MEM_BASES, 0);
5546                 /* SDMA GFX */
5547                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5548                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5549                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5550                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5551                 /* XXX SDMA RLC - todo */
5552         }
5553         cik_srbm_select(rdev, 0, 0, 0, 0);
5554         mutex_unlock(&rdev->srbm_mutex);
5555
5556         cik_pcie_gart_tlb_flush(rdev);
5557         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5558                  (unsigned)(rdev->mc.gtt_size >> 20),
5559                  (unsigned long long)rdev->gart.table_addr);
5560         rdev->gart.ready = true;
5561         return 0;
5562 }
5563
5564 /**
5565  * cik_pcie_gart_disable - gart disable
5566  *
5567  * @rdev: radeon_device pointer
5568  *
5569  * This disables all VM page table (CIK).
5570  */
5571 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5572 {
5573         unsigned i;
5574
5575         for (i = 1; i < 16; ++i) {
5576                 uint32_t reg;
5577                 if (i < 8)
5578                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5579                 else
5580                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5581                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5582         }
5583
5584         /* Disable all tables */
5585         WREG32(VM_CONTEXT0_CNTL, 0);
5586         WREG32(VM_CONTEXT1_CNTL, 0);
5587         /* Setup TLB control */
5588         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5589                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5590         /* Setup L2 cache */
5591         WREG32(VM_L2_CNTL,
5592                ENABLE_L2_FRAGMENT_PROCESSING |
5593                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5594                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5595                EFFECTIVE_L2_QUEUE_SIZE(7) |
5596                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5597         WREG32(VM_L2_CNTL2, 0);
5598         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5599                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5600         radeon_gart_table_vram_unpin(rdev);
5601 }
5602
5603 /**
5604  * cik_pcie_gart_fini - vm fini callback
5605  *
5606  * @rdev: radeon_device pointer
5607  *
5608  * Tears down the driver GART/VM setup (CIK).
5609  */
5610 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5611 {
5612         cik_pcie_gart_disable(rdev);
5613         radeon_gart_table_vram_free(rdev);
5614         radeon_gart_fini(rdev);
5615 }
5616
5617 /* vm parser */
5618 /**
5619  * cik_ib_parse - vm ib_parse callback
5620  *
5621  * @rdev: radeon_device pointer
5622  * @ib: indirect buffer pointer
5623  *
5624  * CIK uses hw IB checking so this is a nop (CIK).
5625  */
5626 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5627 {
5628         return 0;
5629 }
5630
5631 /*
5632  * vm
5633  * VMID 0 is the physical GPU addresses as used by the kernel.
5634  * VMIDs 1-15 are used for userspace clients and are handled
5635  * by the radeon vm/hsa code.
5636  */
5637 /**
5638  * cik_vm_init - cik vm init callback
5639  *
5640  * @rdev: radeon_device pointer
5641  *
5642  * Inits cik specific vm parameters (number of VMs, base of vram for
5643  * VMIDs 1-15) (CIK).
5644  * Returns 0 for success.
5645  */
5646 int cik_vm_init(struct radeon_device *rdev)
5647 {
5648         /*
5649          * number of VMs
5650          * VMID 0 is reserved for System
5651          * radeon graphics/compute will use VMIDs 1-15
5652          */
5653         rdev->vm_manager.nvm = 16;
5654         /* base offset of vram pages */
5655         if (rdev->flags & RADEON_IS_IGP) {
5656                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5657                 tmp <<= 22;
5658                 rdev->vm_manager.vram_base_offset = tmp;
5659         } else
5660                 rdev->vm_manager.vram_base_offset = 0;
5661
5662         return 0;
5663 }
5664
5665 /**
5666  * cik_vm_fini - cik vm fini callback
5667  *
5668  * @rdev: radeon_device pointer
5669  *
5670  * Tear down any asic specific VM setup (CIK).
5671  */
5672 void cik_vm_fini(struct radeon_device *rdev)
5673 {
5674 }
5675
5676 /**
5677  * cik_vm_decode_fault - print human readable fault info
5678  *
5679  * @rdev: radeon_device pointer
5680  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5681  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5682  *
5683  * Print human readable fault information (CIK).
5684  */
5685 static void cik_vm_decode_fault(struct radeon_device *rdev,
5686                                 u32 status, u32 addr, u32 mc_client)
5687 {
5688         u32 mc_id;
5689         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5690         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5691         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5692                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5693
5694         if (rdev->family == CHIP_HAWAII)
5695                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5696         else
5697                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5698
5699         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5700                protections, vmid, addr,
5701                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5702                block, mc_client, mc_id);
5703 }
5704
5705 /**
5706  * cik_vm_flush - cik vm flush using the CP
5707  *
5708  * @rdev: radeon_device pointer
5709  *
5710  * Update the page table base and flush the VM TLB
5711  * using the CP (CIK).
5712  */
5713 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5714                   unsigned vm_id, uint64_t pd_addr)
5715 {
5716         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5717
5718         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5719         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5720                                  WRITE_DATA_DST_SEL(0)));
5721         if (vm_id < 8) {
5722                 radeon_ring_write(ring,
5723                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5724         } else {
5725                 radeon_ring_write(ring,
5726                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5727         }
5728         radeon_ring_write(ring, 0);
5729         radeon_ring_write(ring, pd_addr >> 12);
5730
5731         /* update SH_MEM_* regs */
5732         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5733         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5734                                  WRITE_DATA_DST_SEL(0)));
5735         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5736         radeon_ring_write(ring, 0);
5737         radeon_ring_write(ring, VMID(vm_id));
5738
5739         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5740         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5741                                  WRITE_DATA_DST_SEL(0)));
5742         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5743         radeon_ring_write(ring, 0);
5744
5745         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5746         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5747         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5748         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5749
5750         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5751         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5752                                  WRITE_DATA_DST_SEL(0)));
5753         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5754         radeon_ring_write(ring, 0);
5755         radeon_ring_write(ring, VMID(0));
5756
5757         /* HDP flush */
5758         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5759
5760         /* bits 0-15 are the VM contexts0-15 */
5761         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5762         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5763                                  WRITE_DATA_DST_SEL(0)));
5764         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5765         radeon_ring_write(ring, 0);
5766         radeon_ring_write(ring, 1 << vm_id);
5767
5768         /* wait for the invalidate to complete */
5769         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5770         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5771                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5772                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5773         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5774         radeon_ring_write(ring, 0);
5775         radeon_ring_write(ring, 0); /* ref */
5776         radeon_ring_write(ring, 0); /* mask */
5777         radeon_ring_write(ring, 0x20); /* poll interval */
5778
5779         /* compute doesn't have PFP */
5780         if (usepfp) {
5781                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5782                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5783                 radeon_ring_write(ring, 0x0);
5784         }
5785 }
5786
5787 /*
5788  * RLC
5789  * The RLC is a multi-purpose microengine that handles a
5790  * variety of functions, the most important of which is
5791  * the interrupt controller.
5792  */
5793 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5794                                           bool enable)
5795 {
5796         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5797
5798         if (enable)
5799                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5800         else
5801                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5802         WREG32(CP_INT_CNTL_RING0, tmp);
5803 }
5804
5805 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5806 {
5807         u32 tmp;
5808
5809         tmp = RREG32(RLC_LB_CNTL);
5810         if (enable)
5811                 tmp |= LOAD_BALANCE_ENABLE;
5812         else
5813                 tmp &= ~LOAD_BALANCE_ENABLE;
5814         WREG32(RLC_LB_CNTL, tmp);
5815 }
5816
5817 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5818 {
5819         u32 i, j, k;
5820         u32 mask;
5821
5822         mutex_lock(&rdev->grbm_idx_mutex);
5823         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5824                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5825                         cik_select_se_sh(rdev, i, j);
5826                         for (k = 0; k < rdev->usec_timeout; k++) {
5827                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5828                                         break;
5829                                 udelay(1);
5830                         }
5831                 }
5832         }
5833         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5834         mutex_unlock(&rdev->grbm_idx_mutex);
5835
5836         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5837         for (k = 0; k < rdev->usec_timeout; k++) {
5838                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5839                         break;
5840                 udelay(1);
5841         }
5842 }
5843
5844 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5845 {
5846         u32 tmp;
5847
5848         tmp = RREG32(RLC_CNTL);
5849         if (tmp != rlc)
5850                 WREG32(RLC_CNTL, rlc);
5851 }
5852
5853 static u32 cik_halt_rlc(struct radeon_device *rdev)
5854 {
5855         u32 data, orig;
5856
5857         orig = data = RREG32(RLC_CNTL);
5858
5859         if (data & RLC_ENABLE) {
5860                 u32 i;
5861
5862                 data &= ~RLC_ENABLE;
5863                 WREG32(RLC_CNTL, data);
5864
5865                 for (i = 0; i < rdev->usec_timeout; i++) {
5866                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5867                                 break;
5868                         udelay(1);
5869                 }
5870
5871                 cik_wait_for_rlc_serdes(rdev);
5872         }
5873
5874         return orig;
5875 }
5876
5877 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5878 {
5879         u32 tmp, i, mask;
5880
5881         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5882         WREG32(RLC_GPR_REG2, tmp);
5883
5884         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5885         for (i = 0; i < rdev->usec_timeout; i++) {
5886                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5887                         break;
5888                 udelay(1);
5889         }
5890
5891         for (i = 0; i < rdev->usec_timeout; i++) {
5892                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5893                         break;
5894                 udelay(1);
5895         }
5896 }
5897
5898 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5899 {
5900         u32 tmp;
5901
5902         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5903         WREG32(RLC_GPR_REG2, tmp);
5904 }
5905
5906 /**
5907  * cik_rlc_stop - stop the RLC ME
5908  *
5909  * @rdev: radeon_device pointer
5910  *
5911  * Halt the RLC ME (MicroEngine) (CIK).
5912  */
5913 static void cik_rlc_stop(struct radeon_device *rdev)
5914 {
5915         WREG32(RLC_CNTL, 0);
5916
5917         cik_enable_gui_idle_interrupt(rdev, false);
5918
5919         cik_wait_for_rlc_serdes(rdev);
5920 }
5921
5922 /**
5923  * cik_rlc_start - start the RLC ME
5924  *
5925  * @rdev: radeon_device pointer
5926  *
5927  * Unhalt the RLC ME (MicroEngine) (CIK).
5928  */
5929 static void cik_rlc_start(struct radeon_device *rdev)
5930 {
5931         WREG32(RLC_CNTL, RLC_ENABLE);
5932
5933         cik_enable_gui_idle_interrupt(rdev, true);
5934
5935         udelay(50);
5936 }
5937
5938 /**
5939  * cik_rlc_resume - setup the RLC hw
5940  *
5941  * @rdev: radeon_device pointer
5942  *
5943  * Initialize the RLC registers, load the ucode,
5944  * and start the RLC (CIK).
5945  * Returns 0 for success, -EINVAL if the ucode is not available.
5946  */
5947 static int cik_rlc_resume(struct radeon_device *rdev)
5948 {
5949         u32 i, size, tmp;
5950
5951         if (!rdev->rlc_fw)
5952                 return -EINVAL;
5953
5954         cik_rlc_stop(rdev);
5955
5956         /* disable CG */
5957         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5958         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5959
5960         si_rlc_reset(rdev);
5961
5962         cik_init_pg(rdev);
5963
5964         cik_init_cg(rdev);
5965
5966         WREG32(RLC_LB_CNTR_INIT, 0);
5967         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5968
5969         mutex_lock(&rdev->grbm_idx_mutex);
5970         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5971         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5972         WREG32(RLC_LB_PARAMS, 0x00600408);
5973         WREG32(RLC_LB_CNTL, 0x80000004);
5974         mutex_unlock(&rdev->grbm_idx_mutex);
5975
5976         WREG32(RLC_MC_CNTL, 0);
5977         WREG32(RLC_UCODE_CNTL, 0);
5978
5979         if (rdev->new_fw) {
5980                 const struct rlc_firmware_header_v1_0 *hdr =
5981                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5982                 const __le32 *fw_data = (const __le32 *)
5983                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5984
5985                 radeon_ucode_print_rlc_hdr(&hdr->header);
5986
5987                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5988                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5989                 for (i = 0; i < size; i++)
5990                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5991                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5992         } else {
5993                 const __be32 *fw_data;
5994
5995                 switch (rdev->family) {
5996                 case CHIP_BONAIRE:
5997                 case CHIP_HAWAII:
5998                 default:
5999                         size = BONAIRE_RLC_UCODE_SIZE;
6000                         break;
6001                 case CHIP_KAVERI:
6002                         size = KV_RLC_UCODE_SIZE;
6003                         break;
6004                 case CHIP_KABINI:
6005                         size = KB_RLC_UCODE_SIZE;
6006                         break;
6007                 case CHIP_MULLINS:
6008                         size = ML_RLC_UCODE_SIZE;
6009                         break;
6010                 }
6011
6012                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6013                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6014                 for (i = 0; i < size; i++)
6015                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6016                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6017         }
6018
6019         /* XXX - find out what chips support lbpw */
6020         cik_enable_lbpw(rdev, false);
6021
6022         if (rdev->family == CHIP_BONAIRE)
6023                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6024
6025         cik_rlc_start(rdev);
6026
6027         return 0;
6028 }
6029
6030 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6031 {
6032         u32 data, orig, tmp, tmp2;
6033
6034         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6035
6036         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6037                 cik_enable_gui_idle_interrupt(rdev, true);
6038
6039                 tmp = cik_halt_rlc(rdev);
6040
6041                 mutex_lock(&rdev->grbm_idx_mutex);
6042                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6043                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6044                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6045                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6046                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6047                 mutex_unlock(&rdev->grbm_idx_mutex);
6048
6049                 cik_update_rlc(rdev, tmp);
6050
6051                 data |= CGCG_EN | CGLS_EN;
6052         } else {
6053                 cik_enable_gui_idle_interrupt(rdev, false);
6054
6055                 RREG32(CB_CGTT_SCLK_CTRL);
6056                 RREG32(CB_CGTT_SCLK_CTRL);
6057                 RREG32(CB_CGTT_SCLK_CTRL);
6058                 RREG32(CB_CGTT_SCLK_CTRL);
6059
6060                 data &= ~(CGCG_EN | CGLS_EN);
6061         }
6062
6063         if (orig != data)
6064                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6065
6066 }
6067
6068 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6069 {
6070         u32 data, orig, tmp = 0;
6071
6072         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6073                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6074                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6075                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6076                                 data |= CP_MEM_LS_EN;
6077                                 if (orig != data)
6078                                         WREG32(CP_MEM_SLP_CNTL, data);
6079                         }
6080                 }
6081
6082                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6083                 data |= 0x00000001;
6084                 data &= 0xfffffffd;
6085                 if (orig != data)
6086                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6087
6088                 tmp = cik_halt_rlc(rdev);
6089
6090                 mutex_lock(&rdev->grbm_idx_mutex);
6091                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6092                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6093                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6094                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6095                 WREG32(RLC_SERDES_WR_CTRL, data);
6096                 mutex_unlock(&rdev->grbm_idx_mutex);
6097
6098                 cik_update_rlc(rdev, tmp);
6099
6100                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6101                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6102                         data &= ~SM_MODE_MASK;
6103                         data |= SM_MODE(0x2);
6104                         data |= SM_MODE_ENABLE;
6105                         data &= ~CGTS_OVERRIDE;
6106                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6107                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6108                                 data &= ~CGTS_LS_OVERRIDE;
6109                         data &= ~ON_MONITOR_ADD_MASK;
6110                         data |= ON_MONITOR_ADD_EN;
6111                         data |= ON_MONITOR_ADD(0x96);
6112                         if (orig != data)
6113                                 WREG32(CGTS_SM_CTRL_REG, data);
6114                 }
6115         } else {
6116                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6117                 data |= 0x00000003;
6118                 if (orig != data)
6119                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6120
6121                 data = RREG32(RLC_MEM_SLP_CNTL);
6122                 if (data & RLC_MEM_LS_EN) {
6123                         data &= ~RLC_MEM_LS_EN;
6124                         WREG32(RLC_MEM_SLP_CNTL, data);
6125                 }
6126
6127                 data = RREG32(CP_MEM_SLP_CNTL);
6128                 if (data & CP_MEM_LS_EN) {
6129                         data &= ~CP_MEM_LS_EN;
6130                         WREG32(CP_MEM_SLP_CNTL, data);
6131                 }
6132
6133                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6134                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6135                 if (orig != data)
6136                         WREG32(CGTS_SM_CTRL_REG, data);
6137
6138                 tmp = cik_halt_rlc(rdev);
6139
6140                 mutex_lock(&rdev->grbm_idx_mutex);
6141                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6142                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6143                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6144                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6145                 WREG32(RLC_SERDES_WR_CTRL, data);
6146                 mutex_unlock(&rdev->grbm_idx_mutex);
6147
6148                 cik_update_rlc(rdev, tmp);
6149         }
6150 }
6151
6152 static const u32 mc_cg_registers[] =
6153 {
6154         MC_HUB_MISC_HUB_CG,
6155         MC_HUB_MISC_SIP_CG,
6156         MC_HUB_MISC_VM_CG,
6157         MC_XPB_CLK_GAT,
6158         ATC_MISC_CG,
6159         MC_CITF_MISC_WR_CG,
6160         MC_CITF_MISC_RD_CG,
6161         MC_CITF_MISC_VM_CG,
6162         VM_L2_CG,
6163 };
6164
6165 static void cik_enable_mc_ls(struct radeon_device *rdev,
6166                              bool enable)
6167 {
6168         int i;
6169         u32 orig, data;
6170
6171         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6172                 orig = data = RREG32(mc_cg_registers[i]);
6173                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6174                         data |= MC_LS_ENABLE;
6175                 else
6176                         data &= ~MC_LS_ENABLE;
6177                 if (data != orig)
6178                         WREG32(mc_cg_registers[i], data);
6179         }
6180 }
6181
6182 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6183                                bool enable)
6184 {
6185         int i;
6186         u32 orig, data;
6187
6188         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6189                 orig = data = RREG32(mc_cg_registers[i]);
6190                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6191                         data |= MC_CG_ENABLE;
6192                 else
6193                         data &= ~MC_CG_ENABLE;
6194                 if (data != orig)
6195                         WREG32(mc_cg_registers[i], data);
6196         }
6197 }
6198
6199 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6200                                  bool enable)
6201 {
6202         u32 orig, data;
6203
6204         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6205                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6206                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6207         } else {
6208                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6209                 data |= 0xff000000;
6210                 if (data != orig)
6211                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6212
6213                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6214                 data |= 0xff000000;
6215                 if (data != orig)
6216                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6217         }
6218 }
6219
6220 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6221                                  bool enable)
6222 {
6223         u32 orig, data;
6224
6225         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6226                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6227                 data |= 0x100;
6228                 if (orig != data)
6229                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6230
6231                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6232                 data |= 0x100;
6233                 if (orig != data)
6234                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6235         } else {
6236                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6237                 data &= ~0x100;
6238                 if (orig != data)
6239                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6240
6241                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6242                 data &= ~0x100;
6243                 if (orig != data)
6244                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6245         }
6246 }
6247
6248 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6249                                 bool enable)
6250 {
6251         u32 orig, data;
6252
6253         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6254                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6255                 data = 0xfff;
6256                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6257
6258                 orig = data = RREG32(UVD_CGC_CTRL);
6259                 data |= DCM;
6260                 if (orig != data)
6261                         WREG32(UVD_CGC_CTRL, data);
6262         } else {
6263                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6264                 data &= ~0xfff;
6265                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6266
6267                 orig = data = RREG32(UVD_CGC_CTRL);
6268                 data &= ~DCM;
6269                 if (orig != data)
6270                         WREG32(UVD_CGC_CTRL, data);
6271         }
6272 }
6273
6274 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6275                                bool enable)
6276 {
6277         u32 orig, data;
6278
6279         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6280
6281         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6282                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6283                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6284         else
6285                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6286                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6287
6288         if (orig != data)
6289                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6290 }
6291
6292 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6293                                 bool enable)
6294 {
6295         u32 orig, data;
6296
6297         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6298
6299         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6300                 data &= ~CLOCK_GATING_DIS;
6301         else
6302                 data |= CLOCK_GATING_DIS;
6303
6304         if (orig != data)
6305                 WREG32(HDP_HOST_PATH_CNTL, data);
6306 }
6307
6308 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6309                               bool enable)
6310 {
6311         u32 orig, data;
6312
6313         orig = data = RREG32(HDP_MEM_POWER_LS);
6314
6315         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6316                 data |= HDP_LS_ENABLE;
6317         else
6318                 data &= ~HDP_LS_ENABLE;
6319
6320         if (orig != data)
6321                 WREG32(HDP_MEM_POWER_LS, data);
6322 }
6323
6324 void cik_update_cg(struct radeon_device *rdev,
6325                    u32 block, bool enable)
6326 {
6327
6328         if (block & RADEON_CG_BLOCK_GFX) {
6329                 cik_enable_gui_idle_interrupt(rdev, false);
6330                 /* order matters! */
6331                 if (enable) {
6332                         cik_enable_mgcg(rdev, true);
6333                         cik_enable_cgcg(rdev, true);
6334                 } else {
6335                         cik_enable_cgcg(rdev, false);
6336                         cik_enable_mgcg(rdev, false);
6337                 }
6338                 cik_enable_gui_idle_interrupt(rdev, true);
6339         }
6340
6341         if (block & RADEON_CG_BLOCK_MC) {
6342                 if (!(rdev->flags & RADEON_IS_IGP)) {
6343                         cik_enable_mc_mgcg(rdev, enable);
6344                         cik_enable_mc_ls(rdev, enable);
6345                 }
6346         }
6347
6348         if (block & RADEON_CG_BLOCK_SDMA) {
6349                 cik_enable_sdma_mgcg(rdev, enable);
6350                 cik_enable_sdma_mgls(rdev, enable);
6351         }
6352
6353         if (block & RADEON_CG_BLOCK_BIF) {
6354                 cik_enable_bif_mgls(rdev, enable);
6355         }
6356
6357         if (block & RADEON_CG_BLOCK_UVD) {
6358                 if (rdev->has_uvd)
6359                         cik_enable_uvd_mgcg(rdev, enable);
6360         }
6361
6362         if (block & RADEON_CG_BLOCK_HDP) {
6363                 cik_enable_hdp_mgcg(rdev, enable);
6364                 cik_enable_hdp_ls(rdev, enable);
6365         }
6366
6367         if (block & RADEON_CG_BLOCK_VCE) {
6368                 vce_v2_0_enable_mgcg(rdev, enable);
6369         }
6370 }
6371
6372 static void cik_init_cg(struct radeon_device *rdev)
6373 {
6374
6375         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6376
6377         if (rdev->has_uvd)
6378                 si_init_uvd_internal_cg(rdev);
6379
6380         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6381                              RADEON_CG_BLOCK_SDMA |
6382                              RADEON_CG_BLOCK_BIF |
6383                              RADEON_CG_BLOCK_UVD |
6384                              RADEON_CG_BLOCK_HDP), true);
6385 }
6386
6387 static void cik_fini_cg(struct radeon_device *rdev)
6388 {
6389         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6390                              RADEON_CG_BLOCK_SDMA |
6391                              RADEON_CG_BLOCK_BIF |
6392                              RADEON_CG_BLOCK_UVD |
6393                              RADEON_CG_BLOCK_HDP), false);
6394
6395         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6396 }
6397
6398 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6399                                           bool enable)
6400 {
6401         u32 data, orig;
6402
6403         orig = data = RREG32(RLC_PG_CNTL);
6404         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6405                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6406         else
6407                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6408         if (orig != data)
6409                 WREG32(RLC_PG_CNTL, data);
6410 }
6411
6412 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6413                                           bool enable)
6414 {
6415         u32 data, orig;
6416
6417         orig = data = RREG32(RLC_PG_CNTL);
6418         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6419                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6420         else
6421                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6422         if (orig != data)
6423                 WREG32(RLC_PG_CNTL, data);
6424 }
6425
6426 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6427 {
6428         u32 data, orig;
6429
6430         orig = data = RREG32(RLC_PG_CNTL);
6431         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6432                 data &= ~DISABLE_CP_PG;
6433         else
6434                 data |= DISABLE_CP_PG;
6435         if (orig != data)
6436                 WREG32(RLC_PG_CNTL, data);
6437 }
6438
6439 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6440 {
6441         u32 data, orig;
6442
6443         orig = data = RREG32(RLC_PG_CNTL);
6444         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6445                 data &= ~DISABLE_GDS_PG;
6446         else
6447                 data |= DISABLE_GDS_PG;
6448         if (orig != data)
6449                 WREG32(RLC_PG_CNTL, data);
6450 }
6451
6452 #define CP_ME_TABLE_SIZE    96
6453 #define CP_ME_TABLE_OFFSET  2048
6454 #define CP_MEC_TABLE_OFFSET 4096
6455
6456 void cik_init_cp_pg_table(struct radeon_device *rdev)
6457 {
6458         volatile u32 *dst_ptr;
6459         int me, i, max_me = 4;
6460         u32 bo_offset = 0;
6461         u32 table_offset, table_size;
6462
6463         if (rdev->family == CHIP_KAVERI)
6464                 max_me = 5;
6465
6466         if (rdev->rlc.cp_table_ptr == NULL)
6467                 return;
6468
6469         /* write the cp table buffer */
6470         dst_ptr = rdev->rlc.cp_table_ptr;
6471         for (me = 0; me < max_me; me++) {
6472                 if (rdev->new_fw) {
6473                         const __le32 *fw_data;
6474                         const struct gfx_firmware_header_v1_0 *hdr;
6475
6476                         if (me == 0) {
6477                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6478                                 fw_data = (const __le32 *)
6479                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6480                                 table_offset = le32_to_cpu(hdr->jt_offset);
6481                                 table_size = le32_to_cpu(hdr->jt_size);
6482                         } else if (me == 1) {
6483                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6484                                 fw_data = (const __le32 *)
6485                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6486                                 table_offset = le32_to_cpu(hdr->jt_offset);
6487                                 table_size = le32_to_cpu(hdr->jt_size);
6488                         } else if (me == 2) {
6489                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6490                                 fw_data = (const __le32 *)
6491                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6492                                 table_offset = le32_to_cpu(hdr->jt_offset);
6493                                 table_size = le32_to_cpu(hdr->jt_size);
6494                         } else if (me == 3) {
6495                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6496                                 fw_data = (const __le32 *)
6497                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6498                                 table_offset = le32_to_cpu(hdr->jt_offset);
6499                                 table_size = le32_to_cpu(hdr->jt_size);
6500                         } else {
6501                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6502                                 fw_data = (const __le32 *)
6503                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6504                                 table_offset = le32_to_cpu(hdr->jt_offset);
6505                                 table_size = le32_to_cpu(hdr->jt_size);
6506                         }
6507
6508                         for (i = 0; i < table_size; i ++) {
6509                                 dst_ptr[bo_offset + i] =
6510                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6511                         }
6512                         bo_offset += table_size;
6513                 } else {
6514                         const __be32 *fw_data;
6515                         table_size = CP_ME_TABLE_SIZE;
6516
6517                         if (me == 0) {
6518                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6519                                 table_offset = CP_ME_TABLE_OFFSET;
6520                         } else if (me == 1) {
6521                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6522                                 table_offset = CP_ME_TABLE_OFFSET;
6523                         } else if (me == 2) {
6524                                 fw_data = (const __be32 *)rdev->me_fw->data;
6525                                 table_offset = CP_ME_TABLE_OFFSET;
6526                         } else {
6527                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6528                                 table_offset = CP_MEC_TABLE_OFFSET;
6529                         }
6530
6531                         for (i = 0; i < table_size; i ++) {
6532                                 dst_ptr[bo_offset + i] =
6533                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6534                         }
6535                         bo_offset += table_size;
6536                 }
6537         }
6538 }
6539
6540 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6541                                 bool enable)
6542 {
6543         u32 data, orig;
6544
6545         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6546                 orig = data = RREG32(RLC_PG_CNTL);
6547                 data |= GFX_PG_ENABLE;
6548                 if (orig != data)
6549                         WREG32(RLC_PG_CNTL, data);
6550
6551                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6552                 data |= AUTO_PG_EN;
6553                 if (orig != data)
6554                         WREG32(RLC_AUTO_PG_CTRL, data);
6555         } else {
6556                 orig = data = RREG32(RLC_PG_CNTL);
6557                 data &= ~GFX_PG_ENABLE;
6558                 if (orig != data)
6559                         WREG32(RLC_PG_CNTL, data);
6560
6561                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6562                 data &= ~AUTO_PG_EN;
6563                 if (orig != data)
6564                         WREG32(RLC_AUTO_PG_CTRL, data);
6565
6566                 data = RREG32(DB_RENDER_CONTROL);
6567         }
6568 }
6569
6570 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6571 {
6572         u32 mask = 0, tmp, tmp1;
6573         int i;
6574
6575         mutex_lock(&rdev->grbm_idx_mutex);
6576         cik_select_se_sh(rdev, se, sh);
6577         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6578         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6579         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6580         mutex_unlock(&rdev->grbm_idx_mutex);
6581
6582         tmp &= 0xffff0000;
6583
6584         tmp |= tmp1;
6585         tmp >>= 16;
6586
6587         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6588                 mask <<= 1;
6589                 mask |= 1;
6590         }
6591
6592         return (~tmp) & mask;
6593 }
6594
6595 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6596 {
6597         u32 i, j, k, active_cu_number = 0;
6598         u32 mask, counter, cu_bitmap;
6599         u32 tmp = 0;
6600
6601         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6602                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6603                         mask = 1;
6604                         cu_bitmap = 0;
6605                         counter = 0;
6606                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6607                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6608                                         if (counter < 2)
6609                                                 cu_bitmap |= mask;
6610                                         counter ++;
6611                                 }
6612                                 mask <<= 1;
6613                         }
6614
6615                         active_cu_number += counter;
6616                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6617                 }
6618         }
6619
6620         WREG32(RLC_PG_AO_CU_MASK, tmp);
6621
6622         tmp = RREG32(RLC_MAX_PG_CU);
6623         tmp &= ~MAX_PU_CU_MASK;
6624         tmp |= MAX_PU_CU(active_cu_number);
6625         WREG32(RLC_MAX_PG_CU, tmp);
6626 }
6627
6628 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6629                                        bool enable)
6630 {
6631         u32 data, orig;
6632
6633         orig = data = RREG32(RLC_PG_CNTL);
6634         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6635                 data |= STATIC_PER_CU_PG_ENABLE;
6636         else
6637                 data &= ~STATIC_PER_CU_PG_ENABLE;
6638         if (orig != data)
6639                 WREG32(RLC_PG_CNTL, data);
6640 }
6641
6642 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6643                                         bool enable)
6644 {
6645         u32 data, orig;
6646
6647         orig = data = RREG32(RLC_PG_CNTL);
6648         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6649                 data |= DYN_PER_CU_PG_ENABLE;
6650         else
6651                 data &= ~DYN_PER_CU_PG_ENABLE;
6652         if (orig != data)
6653                 WREG32(RLC_PG_CNTL, data);
6654 }
6655
6656 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6657 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6658
6659 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6660 {
6661         u32 data, orig;
6662         u32 i;
6663
6664         if (rdev->rlc.cs_data) {
6665                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6666                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6667                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6668                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6669         } else {
6670                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6671                 for (i = 0; i < 3; i++)
6672                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6673         }
6674         if (rdev->rlc.reg_list) {
6675                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6676                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6677                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6678         }
6679
6680         orig = data = RREG32(RLC_PG_CNTL);
6681         data |= GFX_PG_SRC;
6682         if (orig != data)
6683                 WREG32(RLC_PG_CNTL, data);
6684
6685         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6686         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6687
6688         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6689         data &= ~IDLE_POLL_COUNT_MASK;
6690         data |= IDLE_POLL_COUNT(0x60);
6691         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6692
6693         data = 0x10101010;
6694         WREG32(RLC_PG_DELAY, data);
6695
6696         data = RREG32(RLC_PG_DELAY_2);
6697         data &= ~0xff;
6698         data |= 0x3;
6699         WREG32(RLC_PG_DELAY_2, data);
6700
6701         data = RREG32(RLC_AUTO_PG_CTRL);
6702         data &= ~GRBM_REG_SGIT_MASK;
6703         data |= GRBM_REG_SGIT(0x700);
6704         WREG32(RLC_AUTO_PG_CTRL, data);
6705
6706 }
6707
6708 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6709 {
6710         cik_enable_gfx_cgpg(rdev, enable);
6711         cik_enable_gfx_static_mgpg(rdev, enable);
6712         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6713 }
6714
6715 u32 cik_get_csb_size(struct radeon_device *rdev)
6716 {
6717         u32 count = 0;
6718         const struct cs_section_def *sect = NULL;
6719         const struct cs_extent_def *ext = NULL;
6720
6721         if (rdev->rlc.cs_data == NULL)
6722                 return 0;
6723
6724         /* begin clear state */
6725         count += 2;
6726         /* context control state */
6727         count += 3;
6728
6729         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6730                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6731                         if (sect->id == SECT_CONTEXT)
6732                                 count += 2 + ext->reg_count;
6733                         else
6734                                 return 0;
6735                 }
6736         }
6737         /* pa_sc_raster_config/pa_sc_raster_config1 */
6738         count += 4;
6739         /* end clear state */
6740         count += 2;
6741         /* clear state */
6742         count += 2;
6743
6744         return count;
6745 }
6746
6747 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6748 {
6749         u32 count = 0, i;
6750         const struct cs_section_def *sect = NULL;
6751         const struct cs_extent_def *ext = NULL;
6752
6753         if (rdev->rlc.cs_data == NULL)
6754                 return;
6755         if (buffer == NULL)
6756                 return;
6757
6758         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6759         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6760
6761         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6762         buffer[count++] = cpu_to_le32(0x80000000);
6763         buffer[count++] = cpu_to_le32(0x80000000);
6764
6765         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6766                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6767                         if (sect->id == SECT_CONTEXT) {
6768                                 buffer[count++] =
6769                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6770                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6771                                 for (i = 0; i < ext->reg_count; i++)
6772                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6773                         } else {
6774                                 return;
6775                         }
6776                 }
6777         }
6778
6779         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6780         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6781         switch (rdev->family) {
6782         case CHIP_BONAIRE:
6783                 buffer[count++] = cpu_to_le32(0x16000012);
6784                 buffer[count++] = cpu_to_le32(0x00000000);
6785                 break;
6786         case CHIP_KAVERI:
6787                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6788                 buffer[count++] = cpu_to_le32(0x00000000);
6789                 break;
6790         case CHIP_KABINI:
6791         case CHIP_MULLINS:
6792                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6793                 buffer[count++] = cpu_to_le32(0x00000000);
6794                 break;
6795         case CHIP_HAWAII:
6796                 buffer[count++] = cpu_to_le32(0x3a00161a);
6797                 buffer[count++] = cpu_to_le32(0x0000002e);
6798                 break;
6799         default:
6800                 buffer[count++] = cpu_to_le32(0x00000000);
6801                 buffer[count++] = cpu_to_le32(0x00000000);
6802                 break;
6803         }
6804
6805         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6806         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6807
6808         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6809         buffer[count++] = cpu_to_le32(0);
6810 }
6811
6812 static void cik_init_pg(struct radeon_device *rdev)
6813 {
6814         if (rdev->pg_flags) {
6815                 cik_enable_sck_slowdown_on_pu(rdev, true);
6816                 cik_enable_sck_slowdown_on_pd(rdev, true);
6817                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6818                         cik_init_gfx_cgpg(rdev);
6819                         cik_enable_cp_pg(rdev, true);
6820                         cik_enable_gds_pg(rdev, true);
6821                 }
6822                 cik_init_ao_cu_mask(rdev);
6823                 cik_update_gfx_pg(rdev, true);
6824         }
6825 }
6826
6827 static void cik_fini_pg(struct radeon_device *rdev)
6828 {
6829         if (rdev->pg_flags) {
6830                 cik_update_gfx_pg(rdev, false);
6831                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6832                         cik_enable_cp_pg(rdev, false);
6833                         cik_enable_gds_pg(rdev, false);
6834                 }
6835         }
6836 }
6837
6838 /*
6839  * Interrupts
6840  * Starting with r6xx, interrupts are handled via a ring buffer.
6841  * Ring buffers are areas of GPU accessible memory that the GPU
6842  * writes interrupt vectors into and the host reads vectors out of.
6843  * There is a rptr (read pointer) that determines where the
6844  * host is currently reading, and a wptr (write pointer)
6845  * which determines where the GPU has written.  When the
6846  * pointers are equal, the ring is idle.  When the GPU
6847  * writes vectors to the ring buffer, it increments the
6848  * wptr.  When there is an interrupt, the host then starts
6849  * fetching commands and processing them until the pointers are
6850  * equal again at which point it updates the rptr.
6851  */
6852
6853 /**
6854  * cik_enable_interrupts - Enable the interrupt ring buffer
6855  *
6856  * @rdev: radeon_device pointer
6857  *
6858  * Enable the interrupt ring buffer (CIK).
6859  */
6860 static void cik_enable_interrupts(struct radeon_device *rdev)
6861 {
6862         u32 ih_cntl = RREG32(IH_CNTL);
6863         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6864
6865         ih_cntl |= ENABLE_INTR;
6866         ih_rb_cntl |= IH_RB_ENABLE;
6867         WREG32(IH_CNTL, ih_cntl);
6868         WREG32(IH_RB_CNTL, ih_rb_cntl);
6869         rdev->ih.enabled = true;
6870 }
6871
6872 /**
6873  * cik_disable_interrupts - Disable the interrupt ring buffer
6874  *
6875  * @rdev: radeon_device pointer
6876  *
6877  * Disable the interrupt ring buffer (CIK).
6878  */
6879 static void cik_disable_interrupts(struct radeon_device *rdev)
6880 {
6881         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6882         u32 ih_cntl = RREG32(IH_CNTL);
6883
6884         ih_rb_cntl &= ~IH_RB_ENABLE;
6885         ih_cntl &= ~ENABLE_INTR;
6886         WREG32(IH_RB_CNTL, ih_rb_cntl);
6887         WREG32(IH_CNTL, ih_cntl);
6888         /* set rptr, wptr to 0 */
6889         WREG32(IH_RB_RPTR, 0);
6890         WREG32(IH_RB_WPTR, 0);
6891         rdev->ih.enabled = false;
6892         rdev->ih.rptr = 0;
6893 }
6894
6895 /**
6896  * cik_disable_interrupt_state - Disable all interrupt sources
6897  *
6898  * @rdev: radeon_device pointer
6899  *
6900  * Clear all interrupt enable bits used by the driver (CIK).
6901  */
6902 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6903 {
6904         u32 tmp;
6905
6906         /* gfx ring */
6907         tmp = RREG32(CP_INT_CNTL_RING0) &
6908                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6909         WREG32(CP_INT_CNTL_RING0, tmp);
6910         /* sdma */
6911         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6912         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6913         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6914         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6915         /* compute queues */
6916         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6917         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6918         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6919         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6920         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6921         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6922         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6923         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6924         /* grbm */
6925         WREG32(GRBM_INT_CNTL, 0);
6926         /* SRBM */
6927         WREG32(SRBM_INT_CNTL, 0);
6928         /* vline/vblank, etc. */
6929         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6930         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6931         if (rdev->num_crtc >= 4) {
6932                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6933                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6934         }
6935         if (rdev->num_crtc >= 6) {
6936                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6937                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6938         }
6939         /* pflip */
6940         if (rdev->num_crtc >= 2) {
6941                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6942                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6943         }
6944         if (rdev->num_crtc >= 4) {
6945                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6946                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6947         }
6948         if (rdev->num_crtc >= 6) {
6949                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6950                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6951         }
6952
6953         /* dac hotplug */
6954         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6955
6956         /* digital hotplug */
6957         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6958         WREG32(DC_HPD1_INT_CONTROL, tmp);
6959         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6960         WREG32(DC_HPD2_INT_CONTROL, tmp);
6961         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6962         WREG32(DC_HPD3_INT_CONTROL, tmp);
6963         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6964         WREG32(DC_HPD4_INT_CONTROL, tmp);
6965         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6966         WREG32(DC_HPD5_INT_CONTROL, tmp);
6967         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6968         WREG32(DC_HPD6_INT_CONTROL, tmp);
6969
6970 }
6971
6972 /**
6973  * cik_irq_init - init and enable the interrupt ring
6974  *
6975  * @rdev: radeon_device pointer
6976  *
6977  * Allocate a ring buffer for the interrupt controller,
6978  * enable the RLC, disable interrupts, enable the IH
6979  * ring buffer and enable it (CIK).
6980  * Called at device load and reume.
6981  * Returns 0 for success, errors for failure.
6982  */
6983 static int cik_irq_init(struct radeon_device *rdev)
6984 {
6985         int ret = 0;
6986         int rb_bufsz;
6987         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6988
6989         /* allocate ring */
6990         ret = r600_ih_ring_alloc(rdev);
6991         if (ret)
6992                 return ret;
6993
6994         /* disable irqs */
6995         cik_disable_interrupts(rdev);
6996
6997         /* init rlc */
6998         ret = cik_rlc_resume(rdev);
6999         if (ret) {
7000                 r600_ih_ring_fini(rdev);
7001                 return ret;
7002         }
7003
7004         /* setup interrupt control */
7005         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7006         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7007         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7008         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7009          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7010          */
7011         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7012         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7013         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7014         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7015
7016         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7017         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7018
7019         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7020                       IH_WPTR_OVERFLOW_CLEAR |
7021                       (rb_bufsz << 1));
7022
7023         if (rdev->wb.enabled)
7024                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7025
7026         /* set the writeback address whether it's enabled or not */
7027         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7028         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7029
7030         WREG32(IH_RB_CNTL, ih_rb_cntl);
7031
7032         /* set rptr, wptr to 0 */
7033         WREG32(IH_RB_RPTR, 0);
7034         WREG32(IH_RB_WPTR, 0);
7035
7036         /* Default settings for IH_CNTL (disabled at first) */
7037         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7038         /* RPTR_REARM only works if msi's are enabled */
7039         if (rdev->msi_enabled)
7040                 ih_cntl |= RPTR_REARM;
7041         WREG32(IH_CNTL, ih_cntl);
7042
7043         /* force the active interrupt state to all disabled */
7044         cik_disable_interrupt_state(rdev);
7045
7046         pci_set_master(rdev->pdev);
7047
7048         /* enable irqs */
7049         cik_enable_interrupts(rdev);
7050
7051         return ret;
7052 }
7053
7054 /**
7055  * cik_irq_set - enable/disable interrupt sources
7056  *
7057  * @rdev: radeon_device pointer
7058  *
7059  * Enable interrupt sources on the GPU (vblanks, hpd,
7060  * etc.) (CIK).
7061  * Returns 0 for success, errors for failure.
7062  */
7063 int cik_irq_set(struct radeon_device *rdev)
7064 {
7065         u32 cp_int_cntl;
7066         u32 cp_m1p0;
7067         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7068         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7069         u32 grbm_int_cntl = 0;
7070         u32 dma_cntl, dma_cntl1;
7071
7072         if (!rdev->irq.installed) {
7073                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7074                 return -EINVAL;
7075         }
7076         /* don't enable anything if the ih is disabled */
7077         if (!rdev->ih.enabled) {
7078                 cik_disable_interrupts(rdev);
7079                 /* force the active interrupt state to all disabled */
7080                 cik_disable_interrupt_state(rdev);
7081                 return 0;
7082         }
7083
7084         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7085                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7086         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7087
7088         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7089         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7090         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7091         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7092         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7093         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7094
7095         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7096         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7097
7098         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7099
7100         /* enable CP interrupts on all rings */
7101         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7102                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7103                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7104         }
7105         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7106                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7107                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7108                 if (ring->me == 1) {
7109                         switch (ring->pipe) {
7110                         case 0:
7111                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7112                                 break;
7113                         default:
7114                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7115                                 break;
7116                         }
7117                 } else {
7118                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7119                 }
7120         }
7121         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7122                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7123                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7124                 if (ring->me == 1) {
7125                         switch (ring->pipe) {
7126                         case 0:
7127                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7128                                 break;
7129                         default:
7130                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7131                                 break;
7132                         }
7133                 } else {
7134                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7135                 }
7136         }
7137
7138         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7139                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7140                 dma_cntl |= TRAP_ENABLE;
7141         }
7142
7143         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7144                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7145                 dma_cntl1 |= TRAP_ENABLE;
7146         }
7147
7148         if (rdev->irq.crtc_vblank_int[0] ||
7149             atomic_read(&rdev->irq.pflip[0])) {
7150                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7151                 crtc1 |= VBLANK_INTERRUPT_MASK;
7152         }
7153         if (rdev->irq.crtc_vblank_int[1] ||
7154             atomic_read(&rdev->irq.pflip[1])) {
7155                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7156                 crtc2 |= VBLANK_INTERRUPT_MASK;
7157         }
7158         if (rdev->irq.crtc_vblank_int[2] ||
7159             atomic_read(&rdev->irq.pflip[2])) {
7160                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7161                 crtc3 |= VBLANK_INTERRUPT_MASK;
7162         }
7163         if (rdev->irq.crtc_vblank_int[3] ||
7164             atomic_read(&rdev->irq.pflip[3])) {
7165                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7166                 crtc4 |= VBLANK_INTERRUPT_MASK;
7167         }
7168         if (rdev->irq.crtc_vblank_int[4] ||
7169             atomic_read(&rdev->irq.pflip[4])) {
7170                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7171                 crtc5 |= VBLANK_INTERRUPT_MASK;
7172         }
7173         if (rdev->irq.crtc_vblank_int[5] ||
7174             atomic_read(&rdev->irq.pflip[5])) {
7175                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7176                 crtc6 |= VBLANK_INTERRUPT_MASK;
7177         }
7178         if (rdev->irq.hpd[0]) {
7179                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7180                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7181         }
7182         if (rdev->irq.hpd[1]) {
7183                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7184                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7185         }
7186         if (rdev->irq.hpd[2]) {
7187                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7188                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7189         }
7190         if (rdev->irq.hpd[3]) {
7191                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7192                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7193         }
7194         if (rdev->irq.hpd[4]) {
7195                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7196                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7197         }
7198         if (rdev->irq.hpd[5]) {
7199                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7200                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7201         }
7202
7203         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7204
7205         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7206         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7207
7208         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7209
7210         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7211
7212         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7213         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7214         if (rdev->num_crtc >= 4) {
7215                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7216                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7217         }
7218         if (rdev->num_crtc >= 6) {
7219                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7220                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7221         }
7222
7223         if (rdev->num_crtc >= 2) {
7224                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7225                        GRPH_PFLIP_INT_MASK);
7226                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7227                        GRPH_PFLIP_INT_MASK);
7228         }
7229         if (rdev->num_crtc >= 4) {
7230                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7231                        GRPH_PFLIP_INT_MASK);
7232                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7233                        GRPH_PFLIP_INT_MASK);
7234         }
7235         if (rdev->num_crtc >= 6) {
7236                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7237                        GRPH_PFLIP_INT_MASK);
7238                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7239                        GRPH_PFLIP_INT_MASK);
7240         }
7241
7242         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7243         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7244         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7245         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7246         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7247         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7248
7249         /* posting read */
7250         RREG32(SRBM_STATUS);
7251
7252         return 0;
7253 }
7254
7255 /**
7256  * cik_irq_ack - ack interrupt sources
7257  *
7258  * @rdev: radeon_device pointer
7259  *
7260  * Ack interrupt sources on the GPU (vblanks, hpd,
7261  * etc.) (CIK).  Certain interrupts sources are sw
7262  * generated and do not require an explicit ack.
7263  */
7264 static inline void cik_irq_ack(struct radeon_device *rdev)
7265 {
7266         u32 tmp;
7267
7268         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7269         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7270         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7271         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7272         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7273         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7274         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7275
7276         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7277                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7278         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7279                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7280         if (rdev->num_crtc >= 4) {
7281                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7282                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7283                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7284                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7285         }
7286         if (rdev->num_crtc >= 6) {
7287                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7288                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7289                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7290                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7291         }
7292
7293         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7294                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7295                        GRPH_PFLIP_INT_CLEAR);
7296         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7297                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7298                        GRPH_PFLIP_INT_CLEAR);
7299         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7300                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7301         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7302                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7303         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7304                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7305         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7306                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7307
7308         if (rdev->num_crtc >= 4) {
7309                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7310                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7311                                GRPH_PFLIP_INT_CLEAR);
7312                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7313                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7314                                GRPH_PFLIP_INT_CLEAR);
7315                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7316                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7317                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7318                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7319                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7320                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7321                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7322                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7323         }
7324
7325         if (rdev->num_crtc >= 6) {
7326                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7327                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7328                                GRPH_PFLIP_INT_CLEAR);
7329                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7330                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7331                                GRPH_PFLIP_INT_CLEAR);
7332                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7333                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7334                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7335                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7336                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7337                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7338                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7339                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7340         }
7341
7342         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7343                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7344                 tmp |= DC_HPDx_INT_ACK;
7345                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7346         }
7347         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7348                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7349                 tmp |= DC_HPDx_INT_ACK;
7350                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7351         }
7352         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7353                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7354                 tmp |= DC_HPDx_INT_ACK;
7355                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7356         }
7357         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7358                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7359                 tmp |= DC_HPDx_INT_ACK;
7360                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7361         }
7362         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7363                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7364                 tmp |= DC_HPDx_INT_ACK;
7365                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7366         }
7367         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7368                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7369                 tmp |= DC_HPDx_INT_ACK;
7370                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7371         }
7372         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7373                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7374                 tmp |= DC_HPDx_RX_INT_ACK;
7375                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7376         }
7377         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7378                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7379                 tmp |= DC_HPDx_RX_INT_ACK;
7380                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7381         }
7382         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7383                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7384                 tmp |= DC_HPDx_RX_INT_ACK;
7385                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7386         }
7387         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7388                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7389                 tmp |= DC_HPDx_RX_INT_ACK;
7390                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7391         }
7392         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7393                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7394                 tmp |= DC_HPDx_RX_INT_ACK;
7395                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7396         }
7397         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7398                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7399                 tmp |= DC_HPDx_RX_INT_ACK;
7400                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7401         }
7402 }
7403
7404 /**
7405  * cik_irq_disable - disable interrupts
7406  *
7407  * @rdev: radeon_device pointer
7408  *
7409  * Disable interrupts on the hw (CIK).
7410  */
7411 static void cik_irq_disable(struct radeon_device *rdev)
7412 {
7413         cik_disable_interrupts(rdev);
7414         /* Wait and acknowledge irq */
7415         mdelay(1);
7416         cik_irq_ack(rdev);
7417         cik_disable_interrupt_state(rdev);
7418 }
7419
7420 /**
7421  * cik_irq_disable - disable interrupts for suspend
7422  *
7423  * @rdev: radeon_device pointer
7424  *
7425  * Disable interrupts and stop the RLC (CIK).
7426  * Used for suspend.
7427  */
7428 static void cik_irq_suspend(struct radeon_device *rdev)
7429 {
7430         cik_irq_disable(rdev);
7431         cik_rlc_stop(rdev);
7432 }
7433
7434 /**
7435  * cik_irq_fini - tear down interrupt support
7436  *
7437  * @rdev: radeon_device pointer
7438  *
7439  * Disable interrupts on the hw and free the IH ring
7440  * buffer (CIK).
7441  * Used for driver unload.
7442  */
7443 static void cik_irq_fini(struct radeon_device *rdev)
7444 {
7445         cik_irq_suspend(rdev);
7446         r600_ih_ring_fini(rdev);
7447 }
7448
7449 /**
7450  * cik_get_ih_wptr - get the IH ring buffer wptr
7451  *
7452  * @rdev: radeon_device pointer
7453  *
7454  * Get the IH ring buffer wptr from either the register
7455  * or the writeback memory buffer (CIK).  Also check for
7456  * ring buffer overflow and deal with it.
7457  * Used by cik_irq_process().
7458  * Returns the value of the wptr.
7459  */
7460 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7461 {
7462         u32 wptr, tmp;
7463
7464         if (rdev->wb.enabled)
7465                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7466         else
7467                 wptr = RREG32(IH_RB_WPTR);
7468
7469         if (wptr & RB_OVERFLOW) {
7470                 wptr &= ~RB_OVERFLOW;
7471                 /* When a ring buffer overflow happen start parsing interrupt
7472                  * from the last not overwritten vector (wptr + 16). Hopefully
7473                  * this should allow us to catchup.
7474                  */
7475                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7476                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7477                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7478                 tmp = RREG32(IH_RB_CNTL);
7479                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7480                 WREG32(IH_RB_CNTL, tmp);
7481         }
7482         return (wptr & rdev->ih.ptr_mask);
7483 }
7484
7485 /*        CIK IV Ring
7486  * Each IV ring entry is 128 bits:
7487  * [7:0]    - interrupt source id
7488  * [31:8]   - reserved
7489  * [59:32]  - interrupt source data
7490  * [63:60]  - reserved
7491  * [71:64]  - RINGID
7492  *            CP:
7493  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7494  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7495  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7496  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7497  *            PIPE_ID - ME0 0=3D
7498  *                    - ME1&2 compute dispatcher (4 pipes each)
7499  *            SDMA:
7500  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7501  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7502  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7503  * [79:72]  - VMID
7504  * [95:80]  - PASID
7505  * [127:96] - reserved
7506  */
7507 /**
7508  * cik_irq_process - interrupt handler
7509  *
7510  * @rdev: radeon_device pointer
7511  *
7512  * Interrupt hander (CIK).  Walk the IH ring,
7513  * ack interrupts and schedule work to handle
7514  * interrupt events.
7515  * Returns irq process return code.
7516  */
7517 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7518 {
7519         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7520         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7521         u32 wptr;
7522         u32 rptr;
7523         u32 src_id, src_data, ring_id;
7524         u8 me_id, pipe_id, queue_id;
7525         u32 ring_index;
7526         bool queue_hotplug = false;
7527         bool queue_dp = false;
7528         bool queue_reset = false;
7529         u32 addr, status, mc_client;
7530         bool queue_thermal = false;
7531
7532         if (!rdev->ih.enabled || rdev->shutdown)
7533                 return IRQ_NONE;
7534
7535         wptr = cik_get_ih_wptr(rdev);
7536
7537 restart_ih:
7538         /* is somebody else already processing irqs? */
7539         if (atomic_xchg(&rdev->ih.lock, 1))
7540                 return IRQ_NONE;
7541
7542         rptr = rdev->ih.rptr;
7543         DRM_DEBUG_VBLANK("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7544
7545         /* Order reading of wptr vs. reading of IH ring data */
7546         rmb();
7547
7548         /* display interrupts */
7549         cik_irq_ack(rdev);
7550
7551         while (rptr != wptr) {
7552                 /* wptr/rptr are in bytes! */
7553                 ring_index = rptr / 4;
7554
7555                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7556                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7557                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7558
7559                 switch (src_id) {
7560                 case 1: /* D1 vblank/vline */
7561                         switch (src_data) {
7562                         case 0: /* D1 vblank */
7563                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7564                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7565
7566                                 if (rdev->irq.crtc_vblank_int[0]) {
7567                                         drm_handle_vblank(rdev->ddev, 0);
7568                                         rdev->pm.vblank_sync = true;
7569                                         wake_up(&rdev->irq.vblank_queue);
7570                                 }
7571                                 if (atomic_read(&rdev->irq.pflip[0]))
7572                                         radeon_crtc_handle_vblank(rdev, 0);
7573                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7574                                 DRM_DEBUG_VBLANK("IH: D1 vblank\n");
7575
7576                                 break;
7577                         case 1: /* D1 vline */
7578                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7579                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7580
7581                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7582                                 DRM_DEBUG_VBLANK("IH: D1 vline\n");
7583
7584                                 break;
7585                         default:
7586                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7587                                 break;
7588                         }
7589                         break;
7590                 case 2: /* D2 vblank/vline */
7591                         switch (src_data) {
7592                         case 0: /* D2 vblank */
7593                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7594                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7595
7596                                 if (rdev->irq.crtc_vblank_int[1]) {
7597                                         drm_handle_vblank(rdev->ddev, 1);
7598                                         rdev->pm.vblank_sync = true;
7599                                         wake_up(&rdev->irq.vblank_queue);
7600                                 }
7601                                 if (atomic_read(&rdev->irq.pflip[1]))
7602                                         radeon_crtc_handle_vblank(rdev, 1);
7603                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7604                                 DRM_DEBUG_VBLANK("IH: D2 vblank\n");
7605
7606                                 break;
7607                         case 1: /* D2 vline */
7608                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7609                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7610
7611                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7612                                 DRM_DEBUG_VBLANK("IH: D2 vline\n");
7613
7614                                 break;
7615                         default:
7616                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7617                                 break;
7618                         }
7619                         break;
7620                 case 3: /* D3 vblank/vline */
7621                         switch (src_data) {
7622                         case 0: /* D3 vblank */
7623                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7624                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7625
7626                                 if (rdev->irq.crtc_vblank_int[2]) {
7627                                         drm_handle_vblank(rdev->ddev, 2);
7628                                         rdev->pm.vblank_sync = true;
7629                                         wake_up(&rdev->irq.vblank_queue);
7630                                 }
7631                                 if (atomic_read(&rdev->irq.pflip[2]))
7632                                         radeon_crtc_handle_vblank(rdev, 2);
7633                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7634                                 DRM_DEBUG_VBLANK("IH: D3 vblank\n");
7635
7636                                 break;
7637                         case 1: /* D3 vline */
7638                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7639                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7640
7641                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7642                                 DRM_DEBUG_VBLANK("IH: D3 vline\n");
7643
7644                                 break;
7645                         default:
7646                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7647                                 break;
7648                         }
7649                         break;
7650                 case 4: /* D4 vblank/vline */
7651                         switch (src_data) {
7652                         case 0: /* D4 vblank */
7653                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7654                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7655
7656                                 if (rdev->irq.crtc_vblank_int[3]) {
7657                                         drm_handle_vblank(rdev->ddev, 3);
7658                                         rdev->pm.vblank_sync = true;
7659                                         wake_up(&rdev->irq.vblank_queue);
7660                                 }
7661                                 if (atomic_read(&rdev->irq.pflip[3]))
7662                                         radeon_crtc_handle_vblank(rdev, 3);
7663                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7664                                 DRM_DEBUG_VBLANK("IH: D4 vblank\n");
7665
7666                                 break;
7667                         case 1: /* D4 vline */
7668                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7669                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7670
7671                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7672                                 DRM_DEBUG_VBLANK("IH: D4 vline\n");
7673
7674                                 break;
7675                         default:
7676                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7677                                 break;
7678                         }
7679                         break;
7680                 case 5: /* D5 vblank/vline */
7681                         switch (src_data) {
7682                         case 0: /* D5 vblank */
7683                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7684                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7685
7686                                 if (rdev->irq.crtc_vblank_int[4]) {
7687                                         drm_handle_vblank(rdev->ddev, 4);
7688                                         rdev->pm.vblank_sync = true;
7689                                         wake_up(&rdev->irq.vblank_queue);
7690                                 }
7691                                 if (atomic_read(&rdev->irq.pflip[4]))
7692                                         radeon_crtc_handle_vblank(rdev, 4);
7693                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7694                                 DRM_DEBUG_VBLANK("IH: D5 vblank\n");
7695
7696                                 break;
7697                         case 1: /* D5 vline */
7698                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7699                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7700
7701                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7702                                 DRM_DEBUG("IH: D5 vline\n");
7703
7704                                 break;
7705                         default:
7706                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7707                                 break;
7708                         }
7709                         break;
7710                 case 6: /* D6 vblank/vline */
7711                         switch (src_data) {
7712                         case 0: /* D6 vblank */
7713                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7714                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7715
7716                                 if (rdev->irq.crtc_vblank_int[5]) {
7717                                         drm_handle_vblank(rdev->ddev, 5);
7718                                         rdev->pm.vblank_sync = true;
7719                                         wake_up(&rdev->irq.vblank_queue);
7720                                 }
7721                                 if (atomic_read(&rdev->irq.pflip[5]))
7722                                         radeon_crtc_handle_vblank(rdev, 5);
7723                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7724                                 DRM_DEBUG_VBLANK("IH: D6 vblank\n");
7725
7726                                 break;
7727                         case 1: /* D6 vline */
7728                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7729                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7730
7731                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7732                                 DRM_DEBUG_VBLANK("IH: D6 vline\n");
7733
7734                                 break;
7735                         default:
7736                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7737                                 break;
7738                         }
7739                         break;
7740                 case 8: /* D1 page flip */
7741                 case 10: /* D2 page flip */
7742                 case 12: /* D3 page flip */
7743                 case 14: /* D4 page flip */
7744                 case 16: /* D5 page flip */
7745                 case 18: /* D6 page flip */
7746                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7747                         if (radeon_use_pflipirq > 0)
7748                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7749                         break;
7750                 case 42: /* HPD hotplug */
7751                         switch (src_data) {
7752                         case 0:
7753                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7754                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7755
7756                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7757                                 queue_hotplug = true;
7758                                 DRM_DEBUG("IH: HPD1\n");
7759
7760                                 break;
7761                         case 1:
7762                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7763                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7764
7765                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7766                                 queue_hotplug = true;
7767                                 DRM_DEBUG("IH: HPD2\n");
7768
7769                                 break;
7770                         case 2:
7771                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7772                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7773
7774                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7775                                 queue_hotplug = true;
7776                                 DRM_DEBUG("IH: HPD3\n");
7777
7778                                 break;
7779                         case 3:
7780                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7781                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7782
7783                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7784                                 queue_hotplug = true;
7785                                 DRM_DEBUG("IH: HPD4\n");
7786
7787                                 break;
7788                         case 4:
7789                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7790                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7791
7792                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7793                                 queue_hotplug = true;
7794                                 DRM_DEBUG("IH: HPD5\n");
7795
7796                                 break;
7797                         case 5:
7798                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7799                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7800
7801                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7802                                 queue_hotplug = true;
7803                                 DRM_DEBUG("IH: HPD6\n");
7804
7805                                 break;
7806                         case 6:
7807                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7808                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809
7810                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7811                                 queue_dp = true;
7812                                 DRM_DEBUG("IH: HPD_RX 1\n");
7813
7814                                 break;
7815                         case 7:
7816                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7817                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818
7819                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7820                                 queue_dp = true;
7821                                 DRM_DEBUG("IH: HPD_RX 2\n");
7822
7823                                 break;
7824                         case 8:
7825                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7826                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827
7828                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7829                                 queue_dp = true;
7830                                 DRM_DEBUG("IH: HPD_RX 3\n");
7831
7832                                 break;
7833                         case 9:
7834                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7835                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836
7837                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7838                                 queue_dp = true;
7839                                 DRM_DEBUG("IH: HPD_RX 4\n");
7840
7841                                 break;
7842                         case 10:
7843                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7844                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845
7846                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7847                                 queue_dp = true;
7848                                 DRM_DEBUG("IH: HPD_RX 5\n");
7849
7850                                 break;
7851                         case 11:
7852                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7853                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854
7855                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7856                                 queue_dp = true;
7857                                 DRM_DEBUG("IH: HPD_RX 6\n");
7858
7859                                 break;
7860                         default:
7861                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7862                                 break;
7863                         }
7864                         break;
7865                 case 96:
7866                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7867                         WREG32(SRBM_INT_ACK, 0x1);
7868                         break;
7869                 case 124: /* UVD */
7870                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7871                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7872                         break;
7873                 case 146:
7874                 case 147:
7875                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7876                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7877                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7878                         /* reset addr and status */
7879                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7880                         if (addr == 0x0 && status == 0x0)
7881                                 break;
7882                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7883                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7884                                 addr);
7885                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7886                                 status);
7887                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7888                         break;
7889                 case 167: /* VCE */
7890                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7891                         switch (src_data) {
7892                         case 0:
7893                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7894                                 break;
7895                         case 1:
7896                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7897                                 break;
7898                         default:
7899                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7900                                 break;
7901                         }
7902                         break;
7903                 case 176: /* GFX RB CP_INT */
7904                 case 177: /* GFX IB CP_INT */
7905                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7906                         break;
7907                 case 181: /* CP EOP event */
7908                         DRM_DEBUG("IH: CP EOP\n");
7909                         /* XXX check the bitfield order! */
7910                         me_id = (ring_id & 0x60) >> 5;
7911                         pipe_id = (ring_id & 0x18) >> 3;
7912                         queue_id = (ring_id & 0x7) >> 0;
7913                         switch (me_id) {
7914                         case 0:
7915                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7916                                 break;
7917                         case 1:
7918                         case 2:
7919                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7920                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7921                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7922                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7923                                 break;
7924                         }
7925                         break;
7926                 case 184: /* CP Privileged reg access */
7927                         DRM_ERROR("Illegal register access in command stream\n");
7928                         /* XXX check the bitfield order! */
7929                         me_id = (ring_id & 0x60) >> 5;
7930                         pipe_id = (ring_id & 0x18) >> 3;
7931                         queue_id = (ring_id & 0x7) >> 0;
7932                         switch (me_id) {
7933                         case 0:
7934                                 /* This results in a full GPU reset, but all we need to do is soft
7935                                  * reset the CP for gfx
7936                                  */
7937                                 queue_reset = true;
7938                                 break;
7939                         case 1:
7940                                 /* XXX compute */
7941                                 queue_reset = true;
7942                                 break;
7943                         case 2:
7944                                 /* XXX compute */
7945                                 queue_reset = true;
7946                                 break;
7947                         }
7948                         break;
7949                 case 185: /* CP Privileged inst */
7950                         DRM_ERROR("Illegal instruction in command stream\n");
7951                         /* XXX check the bitfield order! */
7952                         me_id = (ring_id & 0x60) >> 5;
7953                         pipe_id = (ring_id & 0x18) >> 3;
7954                         queue_id = (ring_id & 0x7) >> 0;
7955                         switch (me_id) {
7956                         case 0:
7957                                 /* This results in a full GPU reset, but all we need to do is soft
7958                                  * reset the CP for gfx
7959                                  */
7960                                 queue_reset = true;
7961                                 break;
7962                         case 1:
7963                                 /* XXX compute */
7964                                 queue_reset = true;
7965                                 break;
7966                         case 2:
7967                                 /* XXX compute */
7968                                 queue_reset = true;
7969                                 break;
7970                         }
7971                         break;
7972                 case 224: /* SDMA trap event */
7973                         /* XXX check the bitfield order! */
7974                         me_id = (ring_id & 0x3) >> 0;
7975                         queue_id = (ring_id & 0xc) >> 2;
7976                         DRM_DEBUG("IH: SDMA trap\n");
7977                         switch (me_id) {
7978                         case 0:
7979                                 switch (queue_id) {
7980                                 case 0:
7981                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7982                                         break;
7983                                 case 1:
7984                                         /* XXX compute */
7985                                         break;
7986                                 case 2:
7987                                         /* XXX compute */
7988                                         break;
7989                                 }
7990                                 break;
7991                         case 1:
7992                                 switch (queue_id) {
7993                                 case 0:
7994                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7995                                         break;
7996                                 case 1:
7997                                         /* XXX compute */
7998                                         break;
7999                                 case 2:
8000                                         /* XXX compute */
8001                                         break;
8002                                 }
8003                                 break;
8004                         }
8005                         break;
8006                 case 230: /* thermal low to high */
8007                         DRM_DEBUG("IH: thermal low to high\n");
8008                         rdev->pm.dpm.thermal.high_to_low = false;
8009                         queue_thermal = true;
8010                         break;
8011                 case 231: /* thermal high to low */
8012                         DRM_DEBUG("IH: thermal high to low\n");
8013                         rdev->pm.dpm.thermal.high_to_low = true;
8014                         queue_thermal = true;
8015                         break;
8016                 case 233: /* GUI IDLE */
8017                         DRM_DEBUG("IH: GUI idle\n");
8018                         break;
8019                 case 241: /* SDMA Privileged inst */
8020                 case 247: /* SDMA Privileged inst */
8021                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8022                         /* XXX check the bitfield order! */
8023                         me_id = (ring_id & 0x3) >> 0;
8024                         queue_id = (ring_id & 0xc) >> 2;
8025                         switch (me_id) {
8026                         case 0:
8027                                 switch (queue_id) {
8028                                 case 0:
8029                                         queue_reset = true;
8030                                         break;
8031                                 case 1:
8032                                         /* XXX compute */
8033                                         queue_reset = true;
8034                                         break;
8035                                 case 2:
8036                                         /* XXX compute */
8037                                         queue_reset = true;
8038                                         break;
8039                                 }
8040                                 break;
8041                         case 1:
8042                                 switch (queue_id) {
8043                                 case 0:
8044                                         queue_reset = true;
8045                                         break;
8046                                 case 1:
8047                                         /* XXX compute */
8048                                         queue_reset = true;
8049                                         break;
8050                                 case 2:
8051                                         /* XXX compute */
8052                                         queue_reset = true;
8053                                         break;
8054                                 }
8055                                 break;
8056                         }
8057                         break;
8058                 default:
8059                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8060                         break;
8061                 }
8062
8063                 /* wptr/rptr are in bytes! */
8064                 rptr += 16;
8065                 rptr &= rdev->ih.ptr_mask;
8066                 WREG32(IH_RB_RPTR, rptr);
8067         }
8068         if (queue_dp)
8069                 schedule_work(&rdev->dp_work);
8070         if (queue_hotplug)
8071                 schedule_delayed_work(&rdev->hotplug_work, 0);
8072         if (queue_reset) {
8073                 rdev->needs_reset = true;
8074                 wake_up_all(&rdev->fence_queue);
8075         }
8076         if (queue_thermal)
8077                 schedule_work(&rdev->pm.dpm.thermal.work);
8078         rdev->ih.rptr = rptr;
8079         atomic_set(&rdev->ih.lock, 0);
8080
8081         /* make sure wptr hasn't changed while processing */
8082         wptr = cik_get_ih_wptr(rdev);
8083         if (wptr != rptr)
8084                 goto restart_ih;
8085
8086         return IRQ_HANDLED;
8087 }
8088
8089 /*
8090  * startup/shutdown callbacks
8091  */
8092 static void cik_uvd_init(struct radeon_device *rdev)
8093 {
8094         int r;
8095
8096         if (!rdev->has_uvd)
8097                 return;
8098
8099         r = radeon_uvd_init(rdev);
8100         if (r) {
8101                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8102                 /*
8103                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8104                  * to early fails cik_uvd_start() and thus nothing happens
8105                  * there. So it is pointless to try to go through that code
8106                  * hence why we disable uvd here.
8107                  */
8108                 rdev->has_uvd = 0;
8109                 return;
8110         }
8111         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8112         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8113 }
8114
8115 static void cik_uvd_start(struct radeon_device *rdev)
8116 {
8117         int r;
8118
8119         if (!rdev->has_uvd)
8120                 return;
8121
8122         r = radeon_uvd_resume(rdev);
8123         if (r) {
8124                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8125                 goto error;
8126         }
8127         r = uvd_v4_2_resume(rdev);
8128         if (r) {
8129                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8130                 goto error;
8131         }
8132         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8133         if (r) {
8134                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8135                 goto error;
8136         }
8137         return;
8138
8139 error:
8140         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8141 }
8142
8143 static void cik_uvd_resume(struct radeon_device *rdev)
8144 {
8145         struct radeon_ring *ring;
8146         int r;
8147
8148         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8149                 return;
8150
8151         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8152         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8153         if (r) {
8154                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8155                 return;
8156         }
8157         r = uvd_v1_0_init(rdev);
8158         if (r) {
8159                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8160                 return;
8161         }
8162 }
8163
8164 static void cik_vce_init(struct radeon_device *rdev)
8165 {
8166         int r;
8167
8168         if (!rdev->has_vce)
8169                 return;
8170
8171         r = radeon_vce_init(rdev);
8172         if (r) {
8173                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8174                 /*
8175                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8176                  * to early fails cik_vce_start() and thus nothing happens
8177                  * there. So it is pointless to try to go through that code
8178                  * hence why we disable vce here.
8179                  */
8180                 rdev->has_vce = 0;
8181                 return;
8182         }
8183         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8184         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8185         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8186         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8187 }
8188
8189 static void cik_vce_start(struct radeon_device *rdev)
8190 {
8191         int r;
8192
8193         if (!rdev->has_vce)
8194                 return;
8195
8196         r = radeon_vce_resume(rdev);
8197         if (r) {
8198                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8199                 goto error;
8200         }
8201         r = vce_v2_0_resume(rdev);
8202         if (r) {
8203                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8204                 goto error;
8205         }
8206         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8207         if (r) {
8208                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8209                 goto error;
8210         }
8211         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8212         if (r) {
8213                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8214                 goto error;
8215         }
8216         return;
8217
8218 error:
8219         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8220         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8221 }
8222
8223 static void cik_vce_resume(struct radeon_device *rdev)
8224 {
8225         struct radeon_ring *ring;
8226         int r;
8227
8228         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8229                 return;
8230
8231         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8232         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8233         if (r) {
8234                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8235                 return;
8236         }
8237         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8238         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8239         if (r) {
8240                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8241                 return;
8242         }
8243         r = vce_v1_0_init(rdev);
8244         if (r) {
8245                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8246                 return;
8247         }
8248 }
8249
8250 /**
8251  * cik_startup - program the asic to a functional state
8252  *
8253  * @rdev: radeon_device pointer
8254  *
8255  * Programs the asic to a functional state (CIK).
8256  * Called by cik_init() and cik_resume().
8257  * Returns 0 for success, error for failure.
8258  */
8259 static int cik_startup(struct radeon_device *rdev)
8260 {
8261         struct radeon_ring *ring;
8262         u32 nop;
8263         int r;
8264
8265         /* enable pcie gen2/3 link */
8266         cik_pcie_gen3_enable(rdev);
8267         /* enable aspm */
8268         cik_program_aspm(rdev);
8269
8270         /* scratch needs to be initialized before MC */
8271         r = r600_vram_scratch_init(rdev);
8272         if (r)
8273                 return r;
8274
8275         cik_mc_program(rdev);
8276
8277         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8278                 r = ci_mc_load_microcode(rdev);
8279                 if (r) {
8280                         DRM_ERROR("Failed to load MC firmware!\n");
8281                         return r;
8282                 }
8283         }
8284
8285         r = cik_pcie_gart_enable(rdev);
8286         if (r)
8287                 return r;
8288         cik_gpu_init(rdev);
8289
8290         /* allocate rlc buffers */
8291         if (rdev->flags & RADEON_IS_IGP) {
8292                 if (rdev->family == CHIP_KAVERI) {
8293                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8294                         rdev->rlc.reg_list_size =
8295                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8296                 } else {
8297                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8298                         rdev->rlc.reg_list_size =
8299                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8300                 }
8301         }
8302         rdev->rlc.cs_data = ci_cs_data;
8303         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8304         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8305         r = sumo_rlc_init(rdev);
8306         if (r) {
8307                 DRM_ERROR("Failed to init rlc BOs!\n");
8308                 return r;
8309         }
8310
8311         /* allocate wb buffer */
8312         r = radeon_wb_init(rdev);
8313         if (r)
8314                 return r;
8315
8316         /* allocate mec buffers */
8317         r = cik_mec_init(rdev);
8318         if (r) {
8319                 DRM_ERROR("Failed to init MEC BOs!\n");
8320                 return r;
8321         }
8322
8323         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8324         if (r) {
8325                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8326                 return r;
8327         }
8328
8329         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8330         if (r) {
8331                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8332                 return r;
8333         }
8334
8335         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8336         if (r) {
8337                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8338                 return r;
8339         }
8340
8341         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8342         if (r) {
8343                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8344                 return r;
8345         }
8346
8347         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8348         if (r) {
8349                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8350                 return r;
8351         }
8352
8353         cik_uvd_start(rdev);
8354         cik_vce_start(rdev);
8355
8356         /* Enable IRQ */
8357         if (!rdev->irq.installed) {
8358                 r = radeon_irq_kms_init(rdev);
8359                 if (r)
8360                         return r;
8361         }
8362
8363         r = cik_irq_init(rdev);
8364         if (r) {
8365                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8366                 radeon_irq_kms_fini(rdev);
8367                 return r;
8368         }
8369         cik_irq_set(rdev);
8370
8371         if (rdev->family == CHIP_HAWAII) {
8372                 if (rdev->new_fw)
8373                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8374                 else
8375                         nop = RADEON_CP_PACKET2;
8376         } else {
8377                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8378         }
8379
8380         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8381         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8382                              nop);
8383         if (r)
8384                 return r;
8385
8386         /* set up the compute queues */
8387         /* type-2 packets are deprecated on MEC, use type-3 instead */
8388         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8389         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8390                              nop);
8391         if (r)
8392                 return r;
8393         ring->me = 1; /* first MEC */
8394         ring->pipe = 0; /* first pipe */
8395         ring->queue = 0; /* first queue */
8396         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8397
8398         /* type-2 packets are deprecated on MEC, use type-3 instead */
8399         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8400         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8401                              nop);
8402         if (r)
8403                 return r;
8404         /* dGPU only have 1 MEC */
8405         ring->me = 1; /* first MEC */
8406         ring->pipe = 0; /* first pipe */
8407         ring->queue = 1; /* second queue */
8408         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8409
8410         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8411         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8412                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8413         if (r)
8414                 return r;
8415
8416         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8417         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8418                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8419         if (r)
8420                 return r;
8421
8422         r = cik_cp_resume(rdev);
8423         if (r)
8424                 return r;
8425
8426         r = cik_sdma_resume(rdev);
8427         if (r)
8428                 return r;
8429
8430         cik_uvd_resume(rdev);
8431         cik_vce_resume(rdev);
8432
8433         r = radeon_ib_pool_init(rdev);
8434         if (r) {
8435                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8436                 return r;
8437         }
8438
8439         r = radeon_vm_manager_init(rdev);
8440         if (r) {
8441                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8442                 return r;
8443         }
8444
8445         r = radeon_audio_init(rdev);
8446         if (r)
8447                 return r;
8448
8449         return 0;
8450 }
8451
8452 /**
8453  * cik_resume - resume the asic to a functional state
8454  *
8455  * @rdev: radeon_device pointer
8456  *
8457  * Programs the asic to a functional state (CIK).
8458  * Called at resume.
8459  * Returns 0 for success, error for failure.
8460  */
8461 int cik_resume(struct radeon_device *rdev)
8462 {
8463         int r;
8464
8465         /* post card */
8466         atom_asic_init(rdev->mode_info.atom_context);
8467
8468         /* init golden registers */
8469         cik_init_golden_registers(rdev);
8470
8471         if (rdev->pm.pm_method == PM_METHOD_DPM)
8472                 radeon_pm_resume(rdev);
8473
8474         rdev->accel_working = true;
8475         r = cik_startup(rdev);
8476         if (r) {
8477                 DRM_ERROR("cik startup failed on resume\n");
8478                 rdev->accel_working = false;
8479                 return r;
8480         }
8481
8482         return r;
8483
8484 }
8485
8486 /**
8487  * cik_suspend - suspend the asic
8488  *
8489  * @rdev: radeon_device pointer
8490  *
8491  * Bring the chip into a state suitable for suspend (CIK).
8492  * Called at suspend.
8493  * Returns 0 for success.
8494  */
8495 int cik_suspend(struct radeon_device *rdev)
8496 {
8497         radeon_pm_suspend(rdev);
8498         radeon_audio_fini(rdev);
8499         radeon_vm_manager_fini(rdev);
8500         cik_cp_enable(rdev, false);
8501         cik_sdma_enable(rdev, false);
8502         if (rdev->has_uvd) {
8503                 uvd_v1_0_fini(rdev);
8504                 radeon_uvd_suspend(rdev);
8505         }
8506         if (rdev->has_vce)
8507                 radeon_vce_suspend(rdev);
8508         cik_fini_pg(rdev);
8509         cik_fini_cg(rdev);
8510         cik_irq_suspend(rdev);
8511         radeon_wb_disable(rdev);
8512         cik_pcie_gart_disable(rdev);
8513         return 0;
8514 }
8515
8516 /* Plan is to move initialization in that function and use
8517  * helper function so that radeon_device_init pretty much
8518  * do nothing more than calling asic specific function. This
8519  * should also allow to remove a bunch of callback function
8520  * like vram_info.
8521  */
8522 /**
8523  * cik_init - asic specific driver and hw init
8524  *
8525  * @rdev: radeon_device pointer
8526  *
8527  * Setup asic specific driver variables and program the hw
8528  * to a functional state (CIK).
8529  * Called at driver startup.
8530  * Returns 0 for success, errors for failure.
8531  */
8532 int cik_init(struct radeon_device *rdev)
8533 {
8534         struct radeon_ring *ring;
8535         int r;
8536
8537         /* Read BIOS */
8538         if (!radeon_get_bios(rdev)) {
8539                 if (ASIC_IS_AVIVO(rdev))
8540                         return -EINVAL;
8541         }
8542         /* Must be an ATOMBIOS */
8543         if (!rdev->is_atom_bios) {
8544                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8545                 return -EINVAL;
8546         }
8547         r = radeon_atombios_init(rdev);
8548         if (r)
8549                 return r;
8550
8551         /* Post card if necessary */
8552         if (!radeon_card_posted(rdev)) {
8553                 if (!rdev->bios) {
8554                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8555                         return -EINVAL;
8556                 }
8557                 DRM_INFO("GPU not posted. posting now...\n");
8558                 atom_asic_init(rdev->mode_info.atom_context);
8559         }
8560         /* init golden registers */
8561         cik_init_golden_registers(rdev);
8562         /* Initialize scratch registers */
8563         cik_scratch_init(rdev);
8564         /* Initialize surface registers */
8565         radeon_surface_init(rdev);
8566         /* Initialize clocks */
8567         radeon_get_clock_info(rdev->ddev);
8568
8569         /* Fence driver */
8570         r = radeon_fence_driver_init(rdev);
8571         if (r)
8572                 return r;
8573
8574         /* initialize memory controller */
8575         r = cik_mc_init(rdev);
8576         if (r)
8577                 return r;
8578         /* Memory manager */
8579         r = radeon_bo_init(rdev);
8580         if (r)
8581                 return r;
8582
8583         if (rdev->flags & RADEON_IS_IGP) {
8584                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8585                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8586                         r = cik_init_microcode(rdev);
8587                         if (r) {
8588                                 DRM_ERROR("Failed to load firmware!\n");
8589                                 return r;
8590                         }
8591                 }
8592         } else {
8593                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8594                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8595                     !rdev->mc_fw) {
8596                         r = cik_init_microcode(rdev);
8597                         if (r) {
8598                                 DRM_ERROR("Failed to load firmware!\n");
8599                                 return r;
8600                         }
8601                 }
8602         }
8603
8604         /* Initialize power management */
8605         radeon_pm_init(rdev);
8606
8607         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8608         ring->ring_obj = NULL;
8609         r600_ring_init(rdev, ring, 1024 * 1024);
8610
8611         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8612         ring->ring_obj = NULL;
8613         r600_ring_init(rdev, ring, 1024 * 1024);
8614         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8615         if (r)
8616                 return r;
8617
8618         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8619         ring->ring_obj = NULL;
8620         r600_ring_init(rdev, ring, 1024 * 1024);
8621         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8622         if (r)
8623                 return r;
8624
8625         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8626         ring->ring_obj = NULL;
8627         r600_ring_init(rdev, ring, 256 * 1024);
8628
8629         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8630         ring->ring_obj = NULL;
8631         r600_ring_init(rdev, ring, 256 * 1024);
8632
8633         cik_uvd_init(rdev);
8634         cik_vce_init(rdev);
8635
8636         rdev->ih.ring_obj = NULL;
8637         r600_ih_ring_init(rdev, 64 * 1024);
8638
8639         r = r600_pcie_gart_init(rdev);
8640         if (r)
8641                 return r;
8642
8643         rdev->accel_working = true;
8644         r = cik_startup(rdev);
8645         if (r) {
8646                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8647                 cik_cp_fini(rdev);
8648                 cik_sdma_fini(rdev);
8649                 cik_irq_fini(rdev);
8650                 sumo_rlc_fini(rdev);
8651                 cik_mec_fini(rdev);
8652                 radeon_wb_fini(rdev);
8653                 radeon_ib_pool_fini(rdev);
8654                 radeon_vm_manager_fini(rdev);
8655                 radeon_irq_kms_fini(rdev);
8656                 cik_pcie_gart_fini(rdev);
8657                 rdev->accel_working = false;
8658         }
8659
8660         /* Don't start up if the MC ucode is missing.
8661          * The default clocks and voltages before the MC ucode
8662          * is loaded are not suffient for advanced operations.
8663          */
8664         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8665                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8666                 return -EINVAL;
8667         }
8668
8669         return 0;
8670 }
8671
8672 /**
8673  * cik_fini - asic specific driver and hw fini
8674  *
8675  * @rdev: radeon_device pointer
8676  *
8677  * Tear down the asic specific driver variables and program the hw
8678  * to an idle state (CIK).
8679  * Called at driver unload.
8680  */
8681 void cik_fini(struct radeon_device *rdev)
8682 {
8683         radeon_pm_fini(rdev);
8684         cik_cp_fini(rdev);
8685         cik_sdma_fini(rdev);
8686         cik_fini_pg(rdev);
8687         cik_fini_cg(rdev);
8688         cik_irq_fini(rdev);
8689         sumo_rlc_fini(rdev);
8690         cik_mec_fini(rdev);
8691         radeon_wb_fini(rdev);
8692         radeon_vm_manager_fini(rdev);
8693         radeon_ib_pool_fini(rdev);
8694         radeon_irq_kms_fini(rdev);
8695         uvd_v1_0_fini(rdev);
8696         radeon_uvd_fini(rdev);
8697         radeon_vce_fini(rdev);
8698         cik_pcie_gart_fini(rdev);
8699         r600_vram_scratch_fini(rdev);
8700         radeon_gem_fini(rdev);
8701         radeon_fence_driver_fini(rdev);
8702         radeon_bo_fini(rdev);
8703         radeon_atombios_fini(rdev);
8704         cik_fini_microcode(rdev);
8705         kfree(rdev->bios);
8706         rdev->bios = NULL;
8707 }
8708
8709 void dce8_program_fmt(struct drm_encoder *encoder)
8710 {
8711         struct drm_device *dev = encoder->dev;
8712         struct radeon_device *rdev = dev->dev_private;
8713         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8714         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8715         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8716         int bpc = 0;
8717         u32 tmp = 0;
8718         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8719
8720         if (connector) {
8721                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8722                 bpc = radeon_get_monitor_bpc(connector);
8723                 dither = radeon_connector->dither;
8724         }
8725
8726         /* LVDS/eDP FMT is set up by atom */
8727         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8728                 return;
8729
8730         /* not needed for analog */
8731         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8732             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8733                 return;
8734
8735         if (bpc == 0)
8736                 return;
8737
8738         switch (bpc) {
8739         case 6:
8740                 if (dither == RADEON_FMT_DITHER_ENABLE)
8741                         /* XXX sort out optimal dither settings */
8742                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8743                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8744                 else
8745                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8746                 break;
8747         case 8:
8748                 if (dither == RADEON_FMT_DITHER_ENABLE)
8749                         /* XXX sort out optimal dither settings */
8750                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8751                                 FMT_RGB_RANDOM_ENABLE |
8752                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8753                 else
8754                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8755                 break;
8756         case 10:
8757                 if (dither == RADEON_FMT_DITHER_ENABLE)
8758                         /* XXX sort out optimal dither settings */
8759                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8760                                 FMT_RGB_RANDOM_ENABLE |
8761                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8762                 else
8763                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8764                 break;
8765         default:
8766                 /* not needed */
8767                 break;
8768         }
8769
8770         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8771 }
8772
8773 /* display watermark setup */
8774 /**
8775  * dce8_line_buffer_adjust - Set up the line buffer
8776  *
8777  * @rdev: radeon_device pointer
8778  * @radeon_crtc: the selected display controller
8779  * @mode: the current display mode on the selected display
8780  * controller
8781  *
8782  * Setup up the line buffer allocation for
8783  * the selected display controller (CIK).
8784  * Returns the line buffer size in pixels.
8785  */
8786 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8787                                    struct radeon_crtc *radeon_crtc,
8788                                    struct drm_display_mode *mode)
8789 {
8790         u32 tmp, buffer_alloc, i;
8791         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8792         /*
8793          * Line Buffer Setup
8794          * There are 6 line buffers, one for each display controllers.
8795          * There are 3 partitions per LB. Select the number of partitions
8796          * to enable based on the display width.  For display widths larger
8797          * than 4096, you need use to use 2 display controllers and combine
8798          * them using the stereo blender.
8799          */
8800         if (radeon_crtc->base.enabled && mode) {
8801                 if (mode->crtc_hdisplay < 1920) {
8802                         tmp = 1;
8803                         buffer_alloc = 2;
8804                 } else if (mode->crtc_hdisplay < 2560) {
8805                         tmp = 2;
8806                         buffer_alloc = 2;
8807                 } else if (mode->crtc_hdisplay < 4096) {
8808                         tmp = 0;
8809                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8810                 } else {
8811                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8812                         tmp = 0;
8813                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8814                 }
8815         } else {
8816                 tmp = 1;
8817                 buffer_alloc = 0;
8818         }
8819
8820         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8821                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8822
8823         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8824                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8825         for (i = 0; i < rdev->usec_timeout; i++) {
8826                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8827                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8828                         break;
8829                 udelay(1);
8830         }
8831
8832         if (radeon_crtc->base.enabled && mode) {
8833                 switch (tmp) {
8834                 case 0:
8835                 default:
8836                         return 4096 * 2;
8837                 case 1:
8838                         return 1920 * 2;
8839                 case 2:
8840                         return 2560 * 2;
8841                 }
8842         }
8843
8844         /* controller not enabled, so no lb used */
8845         return 0;
8846 }
8847
8848 /**
8849  * cik_get_number_of_dram_channels - get the number of dram channels
8850  *
8851  * @rdev: radeon_device pointer
8852  *
8853  * Look up the number of video ram channels (CIK).
8854  * Used for display watermark bandwidth calculations
8855  * Returns the number of dram channels
8856  */
8857 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8858 {
8859         u32 tmp = RREG32(MC_SHARED_CHMAP);
8860
8861         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8862         case 0:
8863         default:
8864                 return 1;
8865         case 1:
8866                 return 2;
8867         case 2:
8868                 return 4;
8869         case 3:
8870                 return 8;
8871         case 4:
8872                 return 3;
8873         case 5:
8874                 return 6;
8875         case 6:
8876                 return 10;
8877         case 7:
8878                 return 12;
8879         case 8:
8880                 return 16;
8881         }
8882 }
8883
8884 struct dce8_wm_params {
8885         u32 dram_channels; /* number of dram channels */
8886         u32 yclk;          /* bandwidth per dram data pin in kHz */
8887         u32 sclk;          /* engine clock in kHz */
8888         u32 disp_clk;      /* display clock in kHz */
8889         u32 src_width;     /* viewport width */
8890         u32 active_time;   /* active display time in ns */
8891         u32 blank_time;    /* blank time in ns */
8892         bool interlaced;    /* mode is interlaced */
8893         fixed20_12 vsc;    /* vertical scale ratio */
8894         u32 num_heads;     /* number of active crtcs */
8895         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8896         u32 lb_size;       /* line buffer allocated to pipe */
8897         u32 vtaps;         /* vertical scaler taps */
8898 };
8899
8900 /**
8901  * dce8_dram_bandwidth - get the dram bandwidth
8902  *
8903  * @wm: watermark calculation data
8904  *
8905  * Calculate the raw dram bandwidth (CIK).
8906  * Used for display watermark bandwidth calculations
8907  * Returns the dram bandwidth in MBytes/s
8908  */
8909 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8910 {
8911         /* Calculate raw DRAM Bandwidth */
8912         fixed20_12 dram_efficiency; /* 0.7 */
8913         fixed20_12 yclk, dram_channels, bandwidth;
8914         fixed20_12 a;
8915
8916         a.full = dfixed_const(1000);
8917         yclk.full = dfixed_const(wm->yclk);
8918         yclk.full = dfixed_div(yclk, a);
8919         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8920         a.full = dfixed_const(10);
8921         dram_efficiency.full = dfixed_const(7);
8922         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8923         bandwidth.full = dfixed_mul(dram_channels, yclk);
8924         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8925
8926         return dfixed_trunc(bandwidth);
8927 }
8928
8929 /**
8930  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8931  *
8932  * @wm: watermark calculation data
8933  *
8934  * Calculate the dram bandwidth used for display (CIK).
8935  * Used for display watermark bandwidth calculations
8936  * Returns the dram bandwidth for display in MBytes/s
8937  */
8938 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8939 {
8940         /* Calculate DRAM Bandwidth and the part allocated to display. */
8941         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8942         fixed20_12 yclk, dram_channels, bandwidth;
8943         fixed20_12 a;
8944
8945         a.full = dfixed_const(1000);
8946         yclk.full = dfixed_const(wm->yclk);
8947         yclk.full = dfixed_div(yclk, a);
8948         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8949         a.full = dfixed_const(10);
8950         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8951         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8952         bandwidth.full = dfixed_mul(dram_channels, yclk);
8953         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8954
8955         return dfixed_trunc(bandwidth);
8956 }
8957
8958 /**
8959  * dce8_data_return_bandwidth - get the data return bandwidth
8960  *
8961  * @wm: watermark calculation data
8962  *
8963  * Calculate the data return bandwidth used for display (CIK).
8964  * Used for display watermark bandwidth calculations
8965  * Returns the data return bandwidth in MBytes/s
8966  */
8967 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8968 {
8969         /* Calculate the display Data return Bandwidth */
8970         fixed20_12 return_efficiency; /* 0.8 */
8971         fixed20_12 sclk, bandwidth;
8972         fixed20_12 a;
8973
8974         a.full = dfixed_const(1000);
8975         sclk.full = dfixed_const(wm->sclk);
8976         sclk.full = dfixed_div(sclk, a);
8977         a.full = dfixed_const(10);
8978         return_efficiency.full = dfixed_const(8);
8979         return_efficiency.full = dfixed_div(return_efficiency, a);
8980         a.full = dfixed_const(32);
8981         bandwidth.full = dfixed_mul(a, sclk);
8982         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8983
8984         return dfixed_trunc(bandwidth);
8985 }
8986
8987 /**
8988  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8989  *
8990  * @wm: watermark calculation data
8991  *
8992  * Calculate the dmif bandwidth used for display (CIK).
8993  * Used for display watermark bandwidth calculations
8994  * Returns the dmif bandwidth in MBytes/s
8995  */
8996 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8997 {
8998         /* Calculate the DMIF Request Bandwidth */
8999         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9000         fixed20_12 disp_clk, bandwidth;
9001         fixed20_12 a, b;
9002
9003         a.full = dfixed_const(1000);
9004         disp_clk.full = dfixed_const(wm->disp_clk);
9005         disp_clk.full = dfixed_div(disp_clk, a);
9006         a.full = dfixed_const(32);
9007         b.full = dfixed_mul(a, disp_clk);
9008
9009         a.full = dfixed_const(10);
9010         disp_clk_request_efficiency.full = dfixed_const(8);
9011         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9012
9013         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9014
9015         return dfixed_trunc(bandwidth);
9016 }
9017
9018 /**
9019  * dce8_available_bandwidth - get the min available bandwidth
9020  *
9021  * @wm: watermark calculation data
9022  *
9023  * Calculate the min available bandwidth used for display (CIK).
9024  * Used for display watermark bandwidth calculations
9025  * Returns the min available bandwidth in MBytes/s
9026  */
9027 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9028 {
9029         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9030         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9031         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9032         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9033
9034         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9035 }
9036
9037 /**
9038  * dce8_average_bandwidth - get the average available bandwidth
9039  *
9040  * @wm: watermark calculation data
9041  *
9042  * Calculate the average available bandwidth used for display (CIK).
9043  * Used for display watermark bandwidth calculations
9044  * Returns the average available bandwidth in MBytes/s
9045  */
9046 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9047 {
9048         /* Calculate the display mode Average Bandwidth
9049          * DisplayMode should contain the source and destination dimensions,
9050          * timing, etc.
9051          */
9052         fixed20_12 bpp;
9053         fixed20_12 line_time;
9054         fixed20_12 src_width;
9055         fixed20_12 bandwidth;
9056         fixed20_12 a;
9057
9058         a.full = dfixed_const(1000);
9059         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9060         line_time.full = dfixed_div(line_time, a);
9061         bpp.full = dfixed_const(wm->bytes_per_pixel);
9062         src_width.full = dfixed_const(wm->src_width);
9063         bandwidth.full = dfixed_mul(src_width, bpp);
9064         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9065         bandwidth.full = dfixed_div(bandwidth, line_time);
9066
9067         return dfixed_trunc(bandwidth);
9068 }
9069
9070 /**
9071  * dce8_latency_watermark - get the latency watermark
9072  *
9073  * @wm: watermark calculation data
9074  *
9075  * Calculate the latency watermark (CIK).
9076  * Used for display watermark bandwidth calculations
9077  * Returns the latency watermark in ns
9078  */
9079 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9080 {
9081         /* First calculate the latency in ns */
9082         u32 mc_latency = 2000; /* 2000 ns. */
9083         u32 available_bandwidth = dce8_available_bandwidth(wm);
9084         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9085         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9086         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9087         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9088                 (wm->num_heads * cursor_line_pair_return_time);
9089         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9090         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9091         u32 tmp, dmif_size = 12288;
9092         fixed20_12 a, b, c;
9093
9094         if (wm->num_heads == 0)
9095                 return 0;
9096
9097         a.full = dfixed_const(2);
9098         b.full = dfixed_const(1);
9099         if ((wm->vsc.full > a.full) ||
9100             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9101             (wm->vtaps >= 5) ||
9102             ((wm->vsc.full >= a.full) && wm->interlaced))
9103                 max_src_lines_per_dst_line = 4;
9104         else
9105                 max_src_lines_per_dst_line = 2;
9106
9107         a.full = dfixed_const(available_bandwidth);
9108         b.full = dfixed_const(wm->num_heads);
9109         a.full = dfixed_div(a, b);
9110         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9111         tmp = min(dfixed_trunc(a), tmp);
9112
9113         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9114
9115         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9116         b.full = dfixed_const(1000);
9117         c.full = dfixed_const(lb_fill_bw);
9118         b.full = dfixed_div(c, b);
9119         a.full = dfixed_div(a, b);
9120         line_fill_time = dfixed_trunc(a);
9121
9122         if (line_fill_time < wm->active_time)
9123                 return latency;
9124         else
9125                 return latency + (line_fill_time - wm->active_time);
9126
9127 }
9128
9129 /**
9130  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9131  * average and available dram bandwidth
9132  *
9133  * @wm: watermark calculation data
9134  *
9135  * Check if the display average bandwidth fits in the display
9136  * dram bandwidth (CIK).
9137  * Used for display watermark bandwidth calculations
9138  * Returns true if the display fits, false if not.
9139  */
9140 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9141 {
9142         if (dce8_average_bandwidth(wm) <=
9143             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9144                 return true;
9145         else
9146                 return false;
9147 }
9148
9149 /**
9150  * dce8_average_bandwidth_vs_available_bandwidth - check
9151  * average and available bandwidth
9152  *
9153  * @wm: watermark calculation data
9154  *
9155  * Check if the display average bandwidth fits in the display
9156  * available bandwidth (CIK).
9157  * Used for display watermark bandwidth calculations
9158  * Returns true if the display fits, false if not.
9159  */
9160 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9161 {
9162         if (dce8_average_bandwidth(wm) <=
9163             (dce8_available_bandwidth(wm) / wm->num_heads))
9164                 return true;
9165         else
9166                 return false;
9167 }
9168
9169 /**
9170  * dce8_check_latency_hiding - check latency hiding
9171  *
9172  * @wm: watermark calculation data
9173  *
9174  * Check latency hiding (CIK).
9175  * Used for display watermark bandwidth calculations
9176  * Returns true if the display fits, false if not.
9177  */
9178 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9179 {
9180         u32 lb_partitions = wm->lb_size / wm->src_width;
9181         u32 line_time = wm->active_time + wm->blank_time;
9182         u32 latency_tolerant_lines;
9183         u32 latency_hiding;
9184         fixed20_12 a;
9185
9186         a.full = dfixed_const(1);
9187         if (wm->vsc.full > a.full)
9188                 latency_tolerant_lines = 1;
9189         else {
9190                 if (lb_partitions <= (wm->vtaps + 1))
9191                         latency_tolerant_lines = 1;
9192                 else
9193                         latency_tolerant_lines = 2;
9194         }
9195
9196         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9197
9198         if (dce8_latency_watermark(wm) <= latency_hiding)
9199                 return true;
9200         else
9201                 return false;
9202 }
9203
9204 /**
9205  * dce8_program_watermarks - program display watermarks
9206  *
9207  * @rdev: radeon_device pointer
9208  * @radeon_crtc: the selected display controller
9209  * @lb_size: line buffer size
9210  * @num_heads: number of display controllers in use
9211  *
9212  * Calculate and program the display watermarks for the
9213  * selected display controller (CIK).
9214  */
9215 static void dce8_program_watermarks(struct radeon_device *rdev,
9216                                     struct radeon_crtc *radeon_crtc,
9217                                     u32 lb_size, u32 num_heads)
9218 {
9219         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9220         struct dce8_wm_params wm_low, wm_high;
9221         u32 active_time;
9222         u32 line_time = 0;
9223         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9224         u32 tmp, wm_mask;
9225
9226         if (radeon_crtc->base.enabled && num_heads && mode) {
9227                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9228                                             (u32)mode->clock);
9229                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9230                                           (u32)mode->clock);
9231                 line_time = min(line_time, (u32)65535);
9232
9233                 /* watermark for high clocks */
9234                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9235                     rdev->pm.dpm_enabled) {
9236                         wm_high.yclk =
9237                                 radeon_dpm_get_mclk(rdev, false) * 10;
9238                         wm_high.sclk =
9239                                 radeon_dpm_get_sclk(rdev, false) * 10;
9240                 } else {
9241                         wm_high.yclk = rdev->pm.current_mclk * 10;
9242                         wm_high.sclk = rdev->pm.current_sclk * 10;
9243                 }
9244
9245                 wm_high.disp_clk = mode->clock;
9246                 wm_high.src_width = mode->crtc_hdisplay;
9247                 wm_high.active_time = active_time;
9248                 wm_high.blank_time = line_time - wm_high.active_time;
9249                 wm_high.interlaced = false;
9250                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9251                         wm_high.interlaced = true;
9252                 wm_high.vsc = radeon_crtc->vsc;
9253                 wm_high.vtaps = 1;
9254                 if (radeon_crtc->rmx_type != RMX_OFF)
9255                         wm_high.vtaps = 2;
9256                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9257                 wm_high.lb_size = lb_size;
9258                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9259                 wm_high.num_heads = num_heads;
9260
9261                 /* set for high clocks */
9262                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9263
9264                 /* possibly force display priority to high */
9265                 /* should really do this at mode validation time... */
9266                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9267                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9268                     !dce8_check_latency_hiding(&wm_high) ||
9269                     (rdev->disp_priority == 2)) {
9270                         DRM_DEBUG_KMS("force priority to high\n");
9271                 }
9272
9273                 /* watermark for low clocks */
9274                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9275                     rdev->pm.dpm_enabled) {
9276                         wm_low.yclk =
9277                                 radeon_dpm_get_mclk(rdev, true) * 10;
9278                         wm_low.sclk =
9279                                 radeon_dpm_get_sclk(rdev, true) * 10;
9280                 } else {
9281                         wm_low.yclk = rdev->pm.current_mclk * 10;
9282                         wm_low.sclk = rdev->pm.current_sclk * 10;
9283                 }
9284
9285                 wm_low.disp_clk = mode->clock;
9286                 wm_low.src_width = mode->crtc_hdisplay;
9287                 wm_low.active_time = active_time;
9288                 wm_low.blank_time = line_time - wm_low.active_time;
9289                 wm_low.interlaced = false;
9290                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9291                         wm_low.interlaced = true;
9292                 wm_low.vsc = radeon_crtc->vsc;
9293                 wm_low.vtaps = 1;
9294                 if (radeon_crtc->rmx_type != RMX_OFF)
9295                         wm_low.vtaps = 2;
9296                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9297                 wm_low.lb_size = lb_size;
9298                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9299                 wm_low.num_heads = num_heads;
9300
9301                 /* set for low clocks */
9302                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9303
9304                 /* possibly force display priority to high */
9305                 /* should really do this at mode validation time... */
9306                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9307                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9308                     !dce8_check_latency_hiding(&wm_low) ||
9309                     (rdev->disp_priority == 2)) {
9310                         DRM_DEBUG_KMS("force priority to high\n");
9311                 }
9312
9313                 /* Save number of lines the linebuffer leads before the scanout */
9314                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9315         }
9316
9317         /* select wm A */
9318         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9319         tmp = wm_mask;
9320         tmp &= ~LATENCY_WATERMARK_MASK(3);
9321         tmp |= LATENCY_WATERMARK_MASK(1);
9322         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9323         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9324                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9325                 LATENCY_HIGH_WATERMARK(line_time)));
9326         /* select wm B */
9327         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9328         tmp &= ~LATENCY_WATERMARK_MASK(3);
9329         tmp |= LATENCY_WATERMARK_MASK(2);
9330         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9331         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9332                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9333                 LATENCY_HIGH_WATERMARK(line_time)));
9334         /* restore original selection */
9335         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9336
9337         /* save values for DPM */
9338         radeon_crtc->line_time = line_time;
9339         radeon_crtc->wm_high = latency_watermark_a;
9340         radeon_crtc->wm_low = latency_watermark_b;
9341 }
9342
9343 /**
9344  * dce8_bandwidth_update - program display watermarks
9345  *
9346  * @rdev: radeon_device pointer
9347  *
9348  * Calculate and program the display watermarks and line
9349  * buffer allocation (CIK).
9350  */
9351 void dce8_bandwidth_update(struct radeon_device *rdev)
9352 {
9353         struct drm_display_mode *mode = NULL;
9354         u32 num_heads = 0, lb_size;
9355         int i;
9356
9357         if (!rdev->mode_info.mode_config_initialized)
9358                 return;
9359
9360         radeon_update_display_priority(rdev);
9361
9362         for (i = 0; i < rdev->num_crtc; i++) {
9363                 if (rdev->mode_info.crtcs[i]->base.enabled)
9364                         num_heads++;
9365         }
9366         for (i = 0; i < rdev->num_crtc; i++) {
9367                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9368                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9369                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9370         }
9371 }
9372
9373 /**
9374  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9375  *
9376  * @rdev: radeon_device pointer
9377  *
9378  * Fetches a GPU clock counter snapshot (SI).
9379  * Returns the 64 bit clock counter snapshot.
9380  */
9381 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9382 {
9383         uint64_t clock;
9384
9385         mutex_lock(&rdev->gpu_clock_mutex);
9386         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9387         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9388                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9389         mutex_unlock(&rdev->gpu_clock_mutex);
9390         return clock;
9391 }
9392
9393 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9394                              u32 cntl_reg, u32 status_reg)
9395 {
9396         int r, i;
9397         struct atom_clock_dividers dividers;
9398         uint32_t tmp;
9399
9400         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9401                                            clock, false, &dividers);
9402         if (r)
9403                 return r;
9404
9405         tmp = RREG32_SMC(cntl_reg);
9406         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9407         tmp |= dividers.post_divider;
9408         WREG32_SMC(cntl_reg, tmp);
9409
9410         for (i = 0; i < 100; i++) {
9411                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9412                         break;
9413                 mdelay(10);
9414         }
9415         if (i == 100)
9416                 return -ETIMEDOUT;
9417
9418         return 0;
9419 }
9420
9421 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9422 {
9423         int r = 0;
9424
9425         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9426         if (r)
9427                 return r;
9428
9429         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9430         return r;
9431 }
9432
9433 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9434 {
9435         int r, i;
9436         struct atom_clock_dividers dividers;
9437         u32 tmp;
9438
9439         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9440                                            ecclk, false, &dividers);
9441         if (r)
9442                 return r;
9443
9444         for (i = 0; i < 100; i++) {
9445                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9446                         break;
9447                 mdelay(10);
9448         }
9449         if (i == 100)
9450                 return -ETIMEDOUT;
9451
9452         tmp = RREG32_SMC(CG_ECLK_CNTL);
9453         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9454         tmp |= dividers.post_divider;
9455         WREG32_SMC(CG_ECLK_CNTL, tmp);
9456
9457         for (i = 0; i < 100; i++) {
9458                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9459                         break;
9460                 mdelay(10);
9461         }
9462         if (i == 100)
9463                 return -ETIMEDOUT;
9464
9465         return 0;
9466 }
9467
9468 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9469 {
9470         struct pci_dev *root = rdev->pdev->bus->self;
9471         int bridge_pos, gpu_pos;
9472         u32 speed_cntl, mask, current_data_rate;
9473         int ret, i;
9474         u16 tmp16;
9475
9476 #if 0
9477         if (pci_is_root_bus(rdev->pdev->bus))
9478                 return;
9479 #endif
9480
9481         if (radeon_pcie_gen2 == 0)
9482                 return;
9483
9484         if (rdev->flags & RADEON_IS_IGP)
9485                 return;
9486
9487         if (!(rdev->flags & RADEON_IS_PCIE))
9488                 return;
9489
9490         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9491         if (ret != 0)
9492                 return;
9493
9494         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9495                 return;
9496
9497         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9498         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9499                 LC_CURRENT_DATA_RATE_SHIFT;
9500         if (mask & DRM_PCIE_SPEED_80) {
9501                 if (current_data_rate == 2) {
9502                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9503                         return;
9504                 }
9505                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9506         } else if (mask & DRM_PCIE_SPEED_50) {
9507                 if (current_data_rate == 1) {
9508                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9509                         return;
9510                 }
9511                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9512         }
9513
9514         bridge_pos = pci_pcie_cap(root);
9515         if (!bridge_pos)
9516                 return;
9517
9518         gpu_pos = pci_pcie_cap(rdev->pdev);
9519         if (!gpu_pos)
9520                 return;
9521
9522         if (mask & DRM_PCIE_SPEED_80) {
9523                 /* re-try equalization if gen3 is not already enabled */
9524                 if (current_data_rate != 2) {
9525                         u16 bridge_cfg, gpu_cfg;
9526                         u16 bridge_cfg2, gpu_cfg2;
9527                         u32 max_lw, current_lw, tmp;
9528
9529                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9530                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9531
9532                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9533                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9534
9535                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9536                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9537
9538                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9539                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9540                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9541
9542                         if (current_lw < max_lw) {
9543                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9544                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9545                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9546                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9547                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9548                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9549                                 }
9550                         }
9551
9552                         for (i = 0; i < 10; i++) {
9553                                 /* check status */
9554                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9555                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9556                                         break;
9557
9558                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9559                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9560
9561                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9562                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9563
9564                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9565                                 tmp |= LC_SET_QUIESCE;
9566                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9567
9568                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9569                                 tmp |= LC_REDO_EQ;
9570                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9571
9572                                 mdelay(100);
9573
9574                                 /* linkctl */
9575                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9576                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9577                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9578                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9579
9580                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9581                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9582                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9583                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9584
9585                                 /* linkctl2 */
9586                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9587                                 tmp16 &= ~((1 << 4) | (7 << 9));
9588                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9589                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9590
9591                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9592                                 tmp16 &= ~((1 << 4) | (7 << 9));
9593                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9594                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9595
9596                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9597                                 tmp &= ~LC_SET_QUIESCE;
9598                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9599                         }
9600                 }
9601         }
9602
9603         /* set the link speed */
9604         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9605         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9606         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9607
9608         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9609         tmp16 &= ~0xf;
9610         if (mask & DRM_PCIE_SPEED_80)
9611                 tmp16 |= 3; /* gen3 */
9612         else if (mask & DRM_PCIE_SPEED_50)
9613                 tmp16 |= 2; /* gen2 */
9614         else
9615                 tmp16 |= 1; /* gen1 */
9616         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9617
9618         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9619         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9620         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9621
9622         for (i = 0; i < rdev->usec_timeout; i++) {
9623                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9624                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9625                         break;
9626                 udelay(1);
9627         }
9628 }
9629
9630 static void cik_program_aspm(struct radeon_device *rdev)
9631 {
9632         u32 data, orig;
9633         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9634 #if 0
9635         bool disable_clkreq = false;
9636 #endif
9637
9638         if (radeon_aspm == 0)
9639                 return;
9640
9641         /* XXX double check IGPs */
9642         if (rdev->flags & RADEON_IS_IGP)
9643                 return;
9644
9645         if (!(rdev->flags & RADEON_IS_PCIE))
9646                 return;
9647
9648         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9649         data &= ~LC_XMIT_N_FTS_MASK;
9650         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9651         if (orig != data)
9652                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9653
9654         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9655         data |= LC_GO_TO_RECOVERY;
9656         if (orig != data)
9657                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9658
9659         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9660         data |= P_IGNORE_EDB_ERR;
9661         if (orig != data)
9662                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9663
9664         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9665         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9666         data |= LC_PMI_TO_L1_DIS;
9667         if (!disable_l0s)
9668                 data |= LC_L0S_INACTIVITY(7);
9669
9670         if (!disable_l1) {
9671                 data |= LC_L1_INACTIVITY(7);
9672                 data &= ~LC_PMI_TO_L1_DIS;
9673                 if (orig != data)
9674                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9675
9676                 if (!disable_plloff_in_l1) {
9677                         bool clk_req_support;
9678
9679                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9680                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9681                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9682                         if (orig != data)
9683                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9684
9685                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9686                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9687                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9688                         if (orig != data)
9689                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9690
9691                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9692                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9693                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9694                         if (orig != data)
9695                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9696
9697                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9698                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9699                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9700                         if (orig != data)
9701                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9702
9703                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9704                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9705                         data |= LC_DYN_LANES_PWR_STATE(3);
9706                         if (orig != data)
9707                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9708
9709 #ifdef zMN_TODO
9710                         if (!disable_clkreq &&
9711                             !pci_is_root_bus(rdev->pdev->bus)) {
9712                                 struct pci_dev *root = rdev->pdev->bus->self;
9713                                 u32 lnkcap;
9714
9715                                 clk_req_support = false;
9716                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9717                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9718                                         clk_req_support = true;
9719                         } else {
9720                                 clk_req_support = false;
9721                         }
9722 #else
9723                         clk_req_support = false;
9724 #endif
9725
9726                         if (clk_req_support) {
9727                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9728                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9729                                 if (orig != data)
9730                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9731
9732                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9733                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9734                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9735                                 if (orig != data)
9736                                         WREG32_SMC(THM_CLK_CNTL, data);
9737
9738                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9739                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9740                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9741                                 if (orig != data)
9742                                         WREG32_SMC(MISC_CLK_CTRL, data);
9743
9744                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9745                                 data &= ~BCLK_AS_XCLK;
9746                                 if (orig != data)
9747                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9748
9749                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9750                                 data &= ~FORCE_BIF_REFCLK_EN;
9751                                 if (orig != data)
9752                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9753
9754                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9755                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9756                                 data |= MPLL_CLKOUT_SEL(4);
9757                                 if (orig != data)
9758                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9759                         }
9760                 }
9761         } else {
9762                 if (orig != data)
9763                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9764         }
9765
9766         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9767         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9768         if (orig != data)
9769                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9770
9771         if (!disable_l0s) {
9772                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9773                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9774                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9775                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9776                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9777                                 data &= ~LC_L0S_INACTIVITY_MASK;
9778                                 if (orig != data)
9779                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9780                         }
9781                 }
9782         }
9783 }