drm/radeon: Partial update to Linux 3.12
[dragonfly.git] / sys / dev / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include <uapi_drm/radeon_drm.h>
30 #include "sid.h"
31 #include "atom.h"
32 #include "si_blit_shaders.h"
33 #include "clearstate_si.h"
34 #include "radeon_ucode.h"
35
36
37 #define PCI_EXP_LNKCTL PCIER_LINKCTRL /* 16 */
38 #define PCI_EXP_LNKCTL2 48
39 #define PCI_EXP_LNKCTL_HAWD PCIEM_LNKCTL_HAWD /* 0x0200 */
40 #define PCI_EXP_DEVSTA PCIER_DEVSTS /* 10 */
41 #define PCI_EXP_DEVSTA_TRPND 0x0020
42 #define PCI_EXP_LNKCAP_CLKPM 0x00040000
43
44 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
53 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
56 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
57 MODULE_FIRMWARE("radeon/VERDE_me.bin");
58 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
59 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
60 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
61 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
63 MODULE_FIRMWARE("radeon/OLAND_me.bin");
64 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
65 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
66 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
67 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
68 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
69 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
70 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
71 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
72 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
73 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
74
75 static void si_pcie_gen3_enable(struct radeon_device *rdev);
76 static void si_program_aspm(struct radeon_device *rdev);
77 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
78                                          bool enable);
79 static void si_fini_pg(struct radeon_device *rdev);
80 static void si_fini_cg(struct radeon_device *rdev);
81 static void si_rlc_stop(struct radeon_device *rdev);
82
83 static const u32 verde_rlc_save_restore_register_list[] =
84 {
85         (0x8000 << 16) | (0x98f4 >> 2),
86         0x00000000,
87         (0x8040 << 16) | (0x98f4 >> 2),
88         0x00000000,
89         (0x8000 << 16) | (0xe80 >> 2),
90         0x00000000,
91         (0x8040 << 16) | (0xe80 >> 2),
92         0x00000000,
93         (0x8000 << 16) | (0x89bc >> 2),
94         0x00000000,
95         (0x8040 << 16) | (0x89bc >> 2),
96         0x00000000,
97         (0x8000 << 16) | (0x8c1c >> 2),
98         0x00000000,
99         (0x8040 << 16) | (0x8c1c >> 2),
100         0x00000000,
101         (0x9c00 << 16) | (0x98f0 >> 2),
102         0x00000000,
103         (0x9c00 << 16) | (0xe7c >> 2),
104         0x00000000,
105         (0x8000 << 16) | (0x9148 >> 2),
106         0x00000000,
107         (0x8040 << 16) | (0x9148 >> 2),
108         0x00000000,
109         (0x9c00 << 16) | (0x9150 >> 2),
110         0x00000000,
111         (0x9c00 << 16) | (0x897c >> 2),
112         0x00000000,
113         (0x9c00 << 16) | (0x8d8c >> 2),
114         0x00000000,
115         (0x9c00 << 16) | (0xac54 >> 2),
116         0X00000000,
117         0x3,
118         (0x9c00 << 16) | (0x98f8 >> 2),
119         0x00000000,
120         (0x9c00 << 16) | (0x9910 >> 2),
121         0x00000000,
122         (0x9c00 << 16) | (0x9914 >> 2),
123         0x00000000,
124         (0x9c00 << 16) | (0x9918 >> 2),
125         0x00000000,
126         (0x9c00 << 16) | (0x991c >> 2),
127         0x00000000,
128         (0x9c00 << 16) | (0x9920 >> 2),
129         0x00000000,
130         (0x9c00 << 16) | (0x9924 >> 2),
131         0x00000000,
132         (0x9c00 << 16) | (0x9928 >> 2),
133         0x00000000,
134         (0x9c00 << 16) | (0x992c >> 2),
135         0x00000000,
136         (0x9c00 << 16) | (0x9930 >> 2),
137         0x00000000,
138         (0x9c00 << 16) | (0x9934 >> 2),
139         0x00000000,
140         (0x9c00 << 16) | (0x9938 >> 2),
141         0x00000000,
142         (0x9c00 << 16) | (0x993c >> 2),
143         0x00000000,
144         (0x9c00 << 16) | (0x9940 >> 2),
145         0x00000000,
146         (0x9c00 << 16) | (0x9944 >> 2),
147         0x00000000,
148         (0x9c00 << 16) | (0x9948 >> 2),
149         0x00000000,
150         (0x9c00 << 16) | (0x994c >> 2),
151         0x00000000,
152         (0x9c00 << 16) | (0x9950 >> 2),
153         0x00000000,
154         (0x9c00 << 16) | (0x9954 >> 2),
155         0x00000000,
156         (0x9c00 << 16) | (0x9958 >> 2),
157         0x00000000,
158         (0x9c00 << 16) | (0x995c >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0x9960 >> 2),
161         0x00000000,
162         (0x9c00 << 16) | (0x9964 >> 2),
163         0x00000000,
164         (0x9c00 << 16) | (0x9968 >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0x996c >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x9970 >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x9974 >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x9978 >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x997c >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9980 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x9984 >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x9988 >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x998c >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x8c00 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x8c14 >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x8c04 >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0x8c08 >> 2),
191         0x00000000,
192         (0x8000 << 16) | (0x9b7c >> 2),
193         0x00000000,
194         (0x8040 << 16) | (0x9b7c >> 2),
195         0x00000000,
196         (0x8000 << 16) | (0xe84 >> 2),
197         0x00000000,
198         (0x8040 << 16) | (0xe84 >> 2),
199         0x00000000,
200         (0x8000 << 16) | (0x89c0 >> 2),
201         0x00000000,
202         (0x8040 << 16) | (0x89c0 >> 2),
203         0x00000000,
204         (0x8000 << 16) | (0x914c >> 2),
205         0x00000000,
206         (0x8040 << 16) | (0x914c >> 2),
207         0x00000000,
208         (0x8000 << 16) | (0x8c20 >> 2),
209         0x00000000,
210         (0x8040 << 16) | (0x8c20 >> 2),
211         0x00000000,
212         (0x8000 << 16) | (0x9354 >> 2),
213         0x00000000,
214         (0x8040 << 16) | (0x9354 >> 2),
215         0x00000000,
216         (0x9c00 << 16) | (0x9060 >> 2),
217         0x00000000,
218         (0x9c00 << 16) | (0x9364 >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x9100 >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x913c >> 2),
223         0x00000000,
224         (0x8000 << 16) | (0x90e0 >> 2),
225         0x00000000,
226         (0x8000 << 16) | (0x90e4 >> 2),
227         0x00000000,
228         (0x8000 << 16) | (0x90e8 >> 2),
229         0x00000000,
230         (0x8040 << 16) | (0x90e0 >> 2),
231         0x00000000,
232         (0x8040 << 16) | (0x90e4 >> 2),
233         0x00000000,
234         (0x8040 << 16) | (0x90e8 >> 2),
235         0x00000000,
236         (0x9c00 << 16) | (0x8bcc >> 2),
237         0x00000000,
238         (0x9c00 << 16) | (0x8b24 >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x88c4 >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x8e50 >> 2),
243         0x00000000,
244         (0x9c00 << 16) | (0x8c0c >> 2),
245         0x00000000,
246         (0x9c00 << 16) | (0x8e58 >> 2),
247         0x00000000,
248         (0x9c00 << 16) | (0x8e5c >> 2),
249         0x00000000,
250         (0x9c00 << 16) | (0x9508 >> 2),
251         0x00000000,
252         (0x9c00 << 16) | (0x950c >> 2),
253         0x00000000,
254         (0x9c00 << 16) | (0x9494 >> 2),
255         0x00000000,
256         (0x9c00 << 16) | (0xac0c >> 2),
257         0x00000000,
258         (0x9c00 << 16) | (0xac10 >> 2),
259         0x00000000,
260         (0x9c00 << 16) | (0xac14 >> 2),
261         0x00000000,
262         (0x9c00 << 16) | (0xae00 >> 2),
263         0x00000000,
264         (0x9c00 << 16) | (0xac08 >> 2),
265         0x00000000,
266         (0x9c00 << 16) | (0x88d4 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0x88c8 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x88cc >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x89b0 >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x8b10 >> 2),
275         0x00000000,
276         (0x9c00 << 16) | (0x8a14 >> 2),
277         0x00000000,
278         (0x9c00 << 16) | (0x9830 >> 2),
279         0x00000000,
280         (0x9c00 << 16) | (0x9834 >> 2),
281         0x00000000,
282         (0x9c00 << 16) | (0x9838 >> 2),
283         0x00000000,
284         (0x9c00 << 16) | (0x9a10 >> 2),
285         0x00000000,
286         (0x8000 << 16) | (0x9870 >> 2),
287         0x00000000,
288         (0x8000 << 16) | (0x9874 >> 2),
289         0x00000000,
290         (0x8001 << 16) | (0x9870 >> 2),
291         0x00000000,
292         (0x8001 << 16) | (0x9874 >> 2),
293         0x00000000,
294         (0x8040 << 16) | (0x9870 >> 2),
295         0x00000000,
296         (0x8040 << 16) | (0x9874 >> 2),
297         0x00000000,
298         (0x8041 << 16) | (0x9870 >> 2),
299         0x00000000,
300         (0x8041 << 16) | (0x9874 >> 2),
301         0x00000000,
302         0x00000000
303 };
304
305 static const u32 tahiti_golden_rlc_registers[] =
306 {
307         0xc424, 0xffffffff, 0x00601005,
308         0xc47c, 0xffffffff, 0x10104040,
309         0xc488, 0xffffffff, 0x0100000a,
310         0xc314, 0xffffffff, 0x00000800,
311         0xc30c, 0xffffffff, 0x800000f4,
312         0xf4a8, 0xffffffff, 0x00000000
313 };
314
315 static const u32 tahiti_golden_registers[] =
316 {
317         0x9a10, 0x00010000, 0x00018208,
318         0x9830, 0xffffffff, 0x00000000,
319         0x9834, 0xf00fffff, 0x00000400,
320         0x9838, 0x0002021c, 0x00020200,
321         0xc78, 0x00000080, 0x00000000,
322         0xd030, 0x000300c0, 0x00800040,
323         0xd830, 0x000300c0, 0x00800040,
324         0x5bb0, 0x000000f0, 0x00000070,
325         0x5bc0, 0x00200000, 0x50100000,
326         0x7030, 0x31000311, 0x00000011,
327         0x277c, 0x00000003, 0x000007ff,
328         0x240c, 0x000007ff, 0x00000000,
329         0x8a14, 0xf000001f, 0x00000007,
330         0x8b24, 0xffffffff, 0x00ffffff,
331         0x8b10, 0x0000ff0f, 0x00000000,
332         0x28a4c, 0x07ffffff, 0x4e000000,
333         0x28350, 0x3f3f3fff, 0x2a00126a,
334         0x30, 0x000000ff, 0x0040,
335         0x34, 0x00000040, 0x00004040,
336         0x9100, 0x07ffffff, 0x03000000,
337         0x8e88, 0x01ff1f3f, 0x00000000,
338         0x8e84, 0x01ff1f3f, 0x00000000,
339         0x9060, 0x0000007f, 0x00000020,
340         0x9508, 0x00010000, 0x00010000,
341         0xac14, 0x00000200, 0x000002fb,
342         0xac10, 0xffffffff, 0x0000543b,
343         0xac0c, 0xffffffff, 0xa9210876,
344         0x88d0, 0xffffffff, 0x000fff40,
345         0x88d4, 0x0000001f, 0x00000010,
346         0x1410, 0x20000000, 0x20fffed8,
347         0x15c0, 0x000c0fc0, 0x000c0400
348 };
349
350 static const u32 tahiti_golden_registers2[] =
351 {
352         0xc64, 0x00000001, 0x00000001
353 };
354
355 static const u32 pitcairn_golden_rlc_registers[] =
356 {
357         0xc424, 0xffffffff, 0x00601004,
358         0xc47c, 0xffffffff, 0x10102020,
359         0xc488, 0xffffffff, 0x01000020,
360         0xc314, 0xffffffff, 0x00000800,
361         0xc30c, 0xffffffff, 0x800000a4
362 };
363
364 static const u32 pitcairn_golden_registers[] =
365 {
366         0x9a10, 0x00010000, 0x00018208,
367         0x9830, 0xffffffff, 0x00000000,
368         0x9834, 0xf00fffff, 0x00000400,
369         0x9838, 0x0002021c, 0x00020200,
370         0xc78, 0x00000080, 0x00000000,
371         0xd030, 0x000300c0, 0x00800040,
372         0xd830, 0x000300c0, 0x00800040,
373         0x5bb0, 0x000000f0, 0x00000070,
374         0x5bc0, 0x00200000, 0x50100000,
375         0x7030, 0x31000311, 0x00000011,
376         0x2ae4, 0x00073ffe, 0x000022a2,
377         0x240c, 0x000007ff, 0x00000000,
378         0x8a14, 0xf000001f, 0x00000007,
379         0x8b24, 0xffffffff, 0x00ffffff,
380         0x8b10, 0x0000ff0f, 0x00000000,
381         0x28a4c, 0x07ffffff, 0x4e000000,
382         0x28350, 0x3f3f3fff, 0x2a00126a,
383         0x30, 0x000000ff, 0x0040,
384         0x34, 0x00000040, 0x00004040,
385         0x9100, 0x07ffffff, 0x03000000,
386         0x9060, 0x0000007f, 0x00000020,
387         0x9508, 0x00010000, 0x00010000,
388         0xac14, 0x000003ff, 0x000000f7,
389         0xac10, 0xffffffff, 0x00000000,
390         0xac0c, 0xffffffff, 0x32761054,
391         0x88d4, 0x0000001f, 0x00000010,
392         0x15c0, 0x000c0fc0, 0x000c0400
393 };
394
395 static const u32 verde_golden_rlc_registers[] =
396 {
397         0xc424, 0xffffffff, 0x033f1005,
398         0xc47c, 0xffffffff, 0x10808020,
399         0xc488, 0xffffffff, 0x00800008,
400         0xc314, 0xffffffff, 0x00001000,
401         0xc30c, 0xffffffff, 0x80010014
402 };
403
404 static const u32 verde_golden_registers[] =
405 {
406         0x9a10, 0x00010000, 0x00018208,
407         0x9830, 0xffffffff, 0x00000000,
408         0x9834, 0xf00fffff, 0x00000400,
409         0x9838, 0x0002021c, 0x00020200,
410         0xc78, 0x00000080, 0x00000000,
411         0xd030, 0x000300c0, 0x00800040,
412         0xd030, 0x000300c0, 0x00800040,
413         0xd830, 0x000300c0, 0x00800040,
414         0xd830, 0x000300c0, 0x00800040,
415         0x5bb0, 0x000000f0, 0x00000070,
416         0x5bc0, 0x00200000, 0x50100000,
417         0x7030, 0x31000311, 0x00000011,
418         0x2ae4, 0x00073ffe, 0x000022a2,
419         0x2ae4, 0x00073ffe, 0x000022a2,
420         0x2ae4, 0x00073ffe, 0x000022a2,
421         0x240c, 0x000007ff, 0x00000000,
422         0x240c, 0x000007ff, 0x00000000,
423         0x240c, 0x000007ff, 0x00000000,
424         0x8a14, 0xf000001f, 0x00000007,
425         0x8a14, 0xf000001f, 0x00000007,
426         0x8a14, 0xf000001f, 0x00000007,
427         0x8b24, 0xffffffff, 0x00ffffff,
428         0x8b10, 0x0000ff0f, 0x00000000,
429         0x28a4c, 0x07ffffff, 0x4e000000,
430         0x28350, 0x3f3f3fff, 0x0000124a,
431         0x28350, 0x3f3f3fff, 0x0000124a,
432         0x28350, 0x3f3f3fff, 0x0000124a,
433         0x30, 0x000000ff, 0x0040,
434         0x34, 0x00000040, 0x00004040,
435         0x9100, 0x07ffffff, 0x03000000,
436         0x9100, 0x07ffffff, 0x03000000,
437         0x8e88, 0x01ff1f3f, 0x00000000,
438         0x8e88, 0x01ff1f3f, 0x00000000,
439         0x8e88, 0x01ff1f3f, 0x00000000,
440         0x8e84, 0x01ff1f3f, 0x00000000,
441         0x8e84, 0x01ff1f3f, 0x00000000,
442         0x8e84, 0x01ff1f3f, 0x00000000,
443         0x9060, 0x0000007f, 0x00000020,
444         0x9508, 0x00010000, 0x00010000,
445         0xac14, 0x000003ff, 0x00000003,
446         0xac14, 0x000003ff, 0x00000003,
447         0xac14, 0x000003ff, 0x00000003,
448         0xac10, 0xffffffff, 0x00000000,
449         0xac10, 0xffffffff, 0x00000000,
450         0xac10, 0xffffffff, 0x00000000,
451         0xac0c, 0xffffffff, 0x00001032,
452         0xac0c, 0xffffffff, 0x00001032,
453         0xac0c, 0xffffffff, 0x00001032,
454         0x88d4, 0x0000001f, 0x00000010,
455         0x88d4, 0x0000001f, 0x00000010,
456         0x88d4, 0x0000001f, 0x00000010,
457         0x15c0, 0x000c0fc0, 0x000c0400
458 };
459
460 static const u32 oland_golden_rlc_registers[] =
461 {
462         0xc424, 0xffffffff, 0x00601005,
463         0xc47c, 0xffffffff, 0x10104040,
464         0xc488, 0xffffffff, 0x0100000a,
465         0xc314, 0xffffffff, 0x00000800,
466         0xc30c, 0xffffffff, 0x800000f4
467 };
468
469 static const u32 oland_golden_registers[] =
470 {
471         0x9a10, 0x00010000, 0x00018208,
472         0x9830, 0xffffffff, 0x00000000,
473         0x9834, 0xf00fffff, 0x00000400,
474         0x9838, 0x0002021c, 0x00020200,
475         0xc78, 0x00000080, 0x00000000,
476         0xd030, 0x000300c0, 0x00800040,
477         0xd830, 0x000300c0, 0x00800040,
478         0x5bb0, 0x000000f0, 0x00000070,
479         0x5bc0, 0x00200000, 0x50100000,
480         0x7030, 0x31000311, 0x00000011,
481         0x2ae4, 0x00073ffe, 0x000022a2,
482         0x240c, 0x000007ff, 0x00000000,
483         0x8a14, 0xf000001f, 0x00000007,
484         0x8b24, 0xffffffff, 0x00ffffff,
485         0x8b10, 0x0000ff0f, 0x00000000,
486         0x28a4c, 0x07ffffff, 0x4e000000,
487         0x28350, 0x3f3f3fff, 0x00000082,
488         0x30, 0x000000ff, 0x0040,
489         0x34, 0x00000040, 0x00004040,
490         0x9100, 0x07ffffff, 0x03000000,
491         0x9060, 0x0000007f, 0x00000020,
492         0x9508, 0x00010000, 0x00010000,
493         0xac14, 0x000003ff, 0x000000f3,
494         0xac10, 0xffffffff, 0x00000000,
495         0xac0c, 0xffffffff, 0x00003210,
496         0x88d4, 0x0000001f, 0x00000010,
497         0x15c0, 0x000c0fc0, 0x000c0400
498 };
499
500 static const u32 hainan_golden_registers[] =
501 {
502         0x9a10, 0x00010000, 0x00018208,
503         0x9830, 0xffffffff, 0x00000000,
504         0x9834, 0xf00fffff, 0x00000400,
505         0x9838, 0x0002021c, 0x00020200,
506         0xd0c0, 0xff000fff, 0x00000100,
507         0xd030, 0x000300c0, 0x00800040,
508         0xd8c0, 0xff000fff, 0x00000100,
509         0xd830, 0x000300c0, 0x00800040,
510         0x2ae4, 0x00073ffe, 0x000022a2,
511         0x240c, 0x000007ff, 0x00000000,
512         0x8a14, 0xf000001f, 0x00000007,
513         0x8b24, 0xffffffff, 0x00ffffff,
514         0x8b10, 0x0000ff0f, 0x00000000,
515         0x28a4c, 0x07ffffff, 0x4e000000,
516         0x28350, 0x3f3f3fff, 0x00000000,
517         0x30, 0x000000ff, 0x0040,
518         0x34, 0x00000040, 0x00004040,
519         0x9100, 0x03e00000, 0x03600000,
520         0x9060, 0x0000007f, 0x00000020,
521         0x9508, 0x00010000, 0x00010000,
522         0xac14, 0x000003ff, 0x000000f1,
523         0xac10, 0xffffffff, 0x00000000,
524         0xac0c, 0xffffffff, 0x00003210,
525         0x88d4, 0x0000001f, 0x00000010,
526         0x15c0, 0x000c0fc0, 0x000c0400
527 };
528
529 static const u32 hainan_golden_registers2[] =
530 {
531         0x98f8, 0xffffffff, 0x02010001
532 };
533
534 static const u32 tahiti_mgcg_cgcg_init[] =
535 {
536         0xc400, 0xffffffff, 0xfffffffc,
537         0x802c, 0xffffffff, 0xe0000000,
538         0x9a60, 0xffffffff, 0x00000100,
539         0x92a4, 0xffffffff, 0x00000100,
540         0xc164, 0xffffffff, 0x00000100,
541         0x9774, 0xffffffff, 0x00000100,
542         0x8984, 0xffffffff, 0x06000100,
543         0x8a18, 0xffffffff, 0x00000100,
544         0x92a0, 0xffffffff, 0x00000100,
545         0xc380, 0xffffffff, 0x00000100,
546         0x8b28, 0xffffffff, 0x00000100,
547         0x9144, 0xffffffff, 0x00000100,
548         0x8d88, 0xffffffff, 0x00000100,
549         0x8d8c, 0xffffffff, 0x00000100,
550         0x9030, 0xffffffff, 0x00000100,
551         0x9034, 0xffffffff, 0x00000100,
552         0x9038, 0xffffffff, 0x00000100,
553         0x903c, 0xffffffff, 0x00000100,
554         0xad80, 0xffffffff, 0x00000100,
555         0xac54, 0xffffffff, 0x00000100,
556         0x897c, 0xffffffff, 0x06000100,
557         0x9868, 0xffffffff, 0x00000100,
558         0x9510, 0xffffffff, 0x00000100,
559         0xaf04, 0xffffffff, 0x00000100,
560         0xae04, 0xffffffff, 0x00000100,
561         0x949c, 0xffffffff, 0x00000100,
562         0x802c, 0xffffffff, 0xe0000000,
563         0x9160, 0xffffffff, 0x00010000,
564         0x9164, 0xffffffff, 0x00030002,
565         0x9168, 0xffffffff, 0x00040007,
566         0x916c, 0xffffffff, 0x00060005,
567         0x9170, 0xffffffff, 0x00090008,
568         0x9174, 0xffffffff, 0x00020001,
569         0x9178, 0xffffffff, 0x00040003,
570         0x917c, 0xffffffff, 0x00000007,
571         0x9180, 0xffffffff, 0x00060005,
572         0x9184, 0xffffffff, 0x00090008,
573         0x9188, 0xffffffff, 0x00030002,
574         0x918c, 0xffffffff, 0x00050004,
575         0x9190, 0xffffffff, 0x00000008,
576         0x9194, 0xffffffff, 0x00070006,
577         0x9198, 0xffffffff, 0x000a0009,
578         0x919c, 0xffffffff, 0x00040003,
579         0x91a0, 0xffffffff, 0x00060005,
580         0x91a4, 0xffffffff, 0x00000009,
581         0x91a8, 0xffffffff, 0x00080007,
582         0x91ac, 0xffffffff, 0x000b000a,
583         0x91b0, 0xffffffff, 0x00050004,
584         0x91b4, 0xffffffff, 0x00070006,
585         0x91b8, 0xffffffff, 0x0008000b,
586         0x91bc, 0xffffffff, 0x000a0009,
587         0x91c0, 0xffffffff, 0x000d000c,
588         0x91c4, 0xffffffff, 0x00060005,
589         0x91c8, 0xffffffff, 0x00080007,
590         0x91cc, 0xffffffff, 0x0000000b,
591         0x91d0, 0xffffffff, 0x000a0009,
592         0x91d4, 0xffffffff, 0x000d000c,
593         0x91d8, 0xffffffff, 0x00070006,
594         0x91dc, 0xffffffff, 0x00090008,
595         0x91e0, 0xffffffff, 0x0000000c,
596         0x91e4, 0xffffffff, 0x000b000a,
597         0x91e8, 0xffffffff, 0x000e000d,
598         0x91ec, 0xffffffff, 0x00080007,
599         0x91f0, 0xffffffff, 0x000a0009,
600         0x91f4, 0xffffffff, 0x0000000d,
601         0x91f8, 0xffffffff, 0x000c000b,
602         0x91fc, 0xffffffff, 0x000f000e,
603         0x9200, 0xffffffff, 0x00090008,
604         0x9204, 0xffffffff, 0x000b000a,
605         0x9208, 0xffffffff, 0x000c000f,
606         0x920c, 0xffffffff, 0x000e000d,
607         0x9210, 0xffffffff, 0x00110010,
608         0x9214, 0xffffffff, 0x000a0009,
609         0x9218, 0xffffffff, 0x000c000b,
610         0x921c, 0xffffffff, 0x0000000f,
611         0x9220, 0xffffffff, 0x000e000d,
612         0x9224, 0xffffffff, 0x00110010,
613         0x9228, 0xffffffff, 0x000b000a,
614         0x922c, 0xffffffff, 0x000d000c,
615         0x9230, 0xffffffff, 0x00000010,
616         0x9234, 0xffffffff, 0x000f000e,
617         0x9238, 0xffffffff, 0x00120011,
618         0x923c, 0xffffffff, 0x000c000b,
619         0x9240, 0xffffffff, 0x000e000d,
620         0x9244, 0xffffffff, 0x00000011,
621         0x9248, 0xffffffff, 0x0010000f,
622         0x924c, 0xffffffff, 0x00130012,
623         0x9250, 0xffffffff, 0x000d000c,
624         0x9254, 0xffffffff, 0x000f000e,
625         0x9258, 0xffffffff, 0x00100013,
626         0x925c, 0xffffffff, 0x00120011,
627         0x9260, 0xffffffff, 0x00150014,
628         0x9264, 0xffffffff, 0x000e000d,
629         0x9268, 0xffffffff, 0x0010000f,
630         0x926c, 0xffffffff, 0x00000013,
631         0x9270, 0xffffffff, 0x00120011,
632         0x9274, 0xffffffff, 0x00150014,
633         0x9278, 0xffffffff, 0x000f000e,
634         0x927c, 0xffffffff, 0x00110010,
635         0x9280, 0xffffffff, 0x00000014,
636         0x9284, 0xffffffff, 0x00130012,
637         0x9288, 0xffffffff, 0x00160015,
638         0x928c, 0xffffffff, 0x0010000f,
639         0x9290, 0xffffffff, 0x00120011,
640         0x9294, 0xffffffff, 0x00000015,
641         0x9298, 0xffffffff, 0x00140013,
642         0x929c, 0xffffffff, 0x00170016,
643         0x9150, 0xffffffff, 0x96940200,
644         0x8708, 0xffffffff, 0x00900100,
645         0xc478, 0xffffffff, 0x00000080,
646         0xc404, 0xffffffff, 0x0020003f,
647         0x30, 0xffffffff, 0x0000001c,
648         0x34, 0x000f0000, 0x000f0000,
649         0x160c, 0xffffffff, 0x00000100,
650         0x1024, 0xffffffff, 0x00000100,
651         0x102c, 0x00000101, 0x00000000,
652         0x20a8, 0xffffffff, 0x00000104,
653         0x264c, 0x000c0000, 0x000c0000,
654         0x2648, 0x000c0000, 0x000c0000,
655         0x55e4, 0xff000fff, 0x00000100,
656         0x55e8, 0x00000001, 0x00000001,
657         0x2f50, 0x00000001, 0x00000001,
658         0x30cc, 0xc0000fff, 0x00000104,
659         0xc1e4, 0x00000001, 0x00000001,
660         0xd0c0, 0xfffffff0, 0x00000100,
661         0xd8c0, 0xfffffff0, 0x00000100
662 };
663
664 static const u32 pitcairn_mgcg_cgcg_init[] =
665 {
666         0xc400, 0xffffffff, 0xfffffffc,
667         0x802c, 0xffffffff, 0xe0000000,
668         0x9a60, 0xffffffff, 0x00000100,
669         0x92a4, 0xffffffff, 0x00000100,
670         0xc164, 0xffffffff, 0x00000100,
671         0x9774, 0xffffffff, 0x00000100,
672         0x8984, 0xffffffff, 0x06000100,
673         0x8a18, 0xffffffff, 0x00000100,
674         0x92a0, 0xffffffff, 0x00000100,
675         0xc380, 0xffffffff, 0x00000100,
676         0x8b28, 0xffffffff, 0x00000100,
677         0x9144, 0xffffffff, 0x00000100,
678         0x8d88, 0xffffffff, 0x00000100,
679         0x8d8c, 0xffffffff, 0x00000100,
680         0x9030, 0xffffffff, 0x00000100,
681         0x9034, 0xffffffff, 0x00000100,
682         0x9038, 0xffffffff, 0x00000100,
683         0x903c, 0xffffffff, 0x00000100,
684         0xad80, 0xffffffff, 0x00000100,
685         0xac54, 0xffffffff, 0x00000100,
686         0x897c, 0xffffffff, 0x06000100,
687         0x9868, 0xffffffff, 0x00000100,
688         0x9510, 0xffffffff, 0x00000100,
689         0xaf04, 0xffffffff, 0x00000100,
690         0xae04, 0xffffffff, 0x00000100,
691         0x949c, 0xffffffff, 0x00000100,
692         0x802c, 0xffffffff, 0xe0000000,
693         0x9160, 0xffffffff, 0x00010000,
694         0x9164, 0xffffffff, 0x00030002,
695         0x9168, 0xffffffff, 0x00040007,
696         0x916c, 0xffffffff, 0x00060005,
697         0x9170, 0xffffffff, 0x00090008,
698         0x9174, 0xffffffff, 0x00020001,
699         0x9178, 0xffffffff, 0x00040003,
700         0x917c, 0xffffffff, 0x00000007,
701         0x9180, 0xffffffff, 0x00060005,
702         0x9184, 0xffffffff, 0x00090008,
703         0x9188, 0xffffffff, 0x00030002,
704         0x918c, 0xffffffff, 0x00050004,
705         0x9190, 0xffffffff, 0x00000008,
706         0x9194, 0xffffffff, 0x00070006,
707         0x9198, 0xffffffff, 0x000a0009,
708         0x919c, 0xffffffff, 0x00040003,
709         0x91a0, 0xffffffff, 0x00060005,
710         0x91a4, 0xffffffff, 0x00000009,
711         0x91a8, 0xffffffff, 0x00080007,
712         0x91ac, 0xffffffff, 0x000b000a,
713         0x91b0, 0xffffffff, 0x00050004,
714         0x91b4, 0xffffffff, 0x00070006,
715         0x91b8, 0xffffffff, 0x0008000b,
716         0x91bc, 0xffffffff, 0x000a0009,
717         0x91c0, 0xffffffff, 0x000d000c,
718         0x9200, 0xffffffff, 0x00090008,
719         0x9204, 0xffffffff, 0x000b000a,
720         0x9208, 0xffffffff, 0x000c000f,
721         0x920c, 0xffffffff, 0x000e000d,
722         0x9210, 0xffffffff, 0x00110010,
723         0x9214, 0xffffffff, 0x000a0009,
724         0x9218, 0xffffffff, 0x000c000b,
725         0x921c, 0xffffffff, 0x0000000f,
726         0x9220, 0xffffffff, 0x000e000d,
727         0x9224, 0xffffffff, 0x00110010,
728         0x9228, 0xffffffff, 0x000b000a,
729         0x922c, 0xffffffff, 0x000d000c,
730         0x9230, 0xffffffff, 0x00000010,
731         0x9234, 0xffffffff, 0x000f000e,
732         0x9238, 0xffffffff, 0x00120011,
733         0x923c, 0xffffffff, 0x000c000b,
734         0x9240, 0xffffffff, 0x000e000d,
735         0x9244, 0xffffffff, 0x00000011,
736         0x9248, 0xffffffff, 0x0010000f,
737         0x924c, 0xffffffff, 0x00130012,
738         0x9250, 0xffffffff, 0x000d000c,
739         0x9254, 0xffffffff, 0x000f000e,
740         0x9258, 0xffffffff, 0x00100013,
741         0x925c, 0xffffffff, 0x00120011,
742         0x9260, 0xffffffff, 0x00150014,
743         0x9150, 0xffffffff, 0x96940200,
744         0x8708, 0xffffffff, 0x00900100,
745         0xc478, 0xffffffff, 0x00000080,
746         0xc404, 0xffffffff, 0x0020003f,
747         0x30, 0xffffffff, 0x0000001c,
748         0x34, 0x000f0000, 0x000f0000,
749         0x160c, 0xffffffff, 0x00000100,
750         0x1024, 0xffffffff, 0x00000100,
751         0x102c, 0x00000101, 0x00000000,
752         0x20a8, 0xffffffff, 0x00000104,
753         0x55e4, 0xff000fff, 0x00000100,
754         0x55e8, 0x00000001, 0x00000001,
755         0x2f50, 0x00000001, 0x00000001,
756         0x30cc, 0xc0000fff, 0x00000104,
757         0xc1e4, 0x00000001, 0x00000001,
758         0xd0c0, 0xfffffff0, 0x00000100,
759         0xd8c0, 0xfffffff0, 0x00000100
760 };
761
762 static const u32 verde_mgcg_cgcg_init[] =
763 {
764         0xc400, 0xffffffff, 0xfffffffc,
765         0x802c, 0xffffffff, 0xe0000000,
766         0x9a60, 0xffffffff, 0x00000100,
767         0x92a4, 0xffffffff, 0x00000100,
768         0xc164, 0xffffffff, 0x00000100,
769         0x9774, 0xffffffff, 0x00000100,
770         0x8984, 0xffffffff, 0x06000100,
771         0x8a18, 0xffffffff, 0x00000100,
772         0x92a0, 0xffffffff, 0x00000100,
773         0xc380, 0xffffffff, 0x00000100,
774         0x8b28, 0xffffffff, 0x00000100,
775         0x9144, 0xffffffff, 0x00000100,
776         0x8d88, 0xffffffff, 0x00000100,
777         0x8d8c, 0xffffffff, 0x00000100,
778         0x9030, 0xffffffff, 0x00000100,
779         0x9034, 0xffffffff, 0x00000100,
780         0x9038, 0xffffffff, 0x00000100,
781         0x903c, 0xffffffff, 0x00000100,
782         0xad80, 0xffffffff, 0x00000100,
783         0xac54, 0xffffffff, 0x00000100,
784         0x897c, 0xffffffff, 0x06000100,
785         0x9868, 0xffffffff, 0x00000100,
786         0x9510, 0xffffffff, 0x00000100,
787         0xaf04, 0xffffffff, 0x00000100,
788         0xae04, 0xffffffff, 0x00000100,
789         0x949c, 0xffffffff, 0x00000100,
790         0x802c, 0xffffffff, 0xe0000000,
791         0x9160, 0xffffffff, 0x00010000,
792         0x9164, 0xffffffff, 0x00030002,
793         0x9168, 0xffffffff, 0x00040007,
794         0x916c, 0xffffffff, 0x00060005,
795         0x9170, 0xffffffff, 0x00090008,
796         0x9174, 0xffffffff, 0x00020001,
797         0x9178, 0xffffffff, 0x00040003,
798         0x917c, 0xffffffff, 0x00000007,
799         0x9180, 0xffffffff, 0x00060005,
800         0x9184, 0xffffffff, 0x00090008,
801         0x9188, 0xffffffff, 0x00030002,
802         0x918c, 0xffffffff, 0x00050004,
803         0x9190, 0xffffffff, 0x00000008,
804         0x9194, 0xffffffff, 0x00070006,
805         0x9198, 0xffffffff, 0x000a0009,
806         0x919c, 0xffffffff, 0x00040003,
807         0x91a0, 0xffffffff, 0x00060005,
808         0x91a4, 0xffffffff, 0x00000009,
809         0x91a8, 0xffffffff, 0x00080007,
810         0x91ac, 0xffffffff, 0x000b000a,
811         0x91b0, 0xffffffff, 0x00050004,
812         0x91b4, 0xffffffff, 0x00070006,
813         0x91b8, 0xffffffff, 0x0008000b,
814         0x91bc, 0xffffffff, 0x000a0009,
815         0x91c0, 0xffffffff, 0x000d000c,
816         0x9200, 0xffffffff, 0x00090008,
817         0x9204, 0xffffffff, 0x000b000a,
818         0x9208, 0xffffffff, 0x000c000f,
819         0x920c, 0xffffffff, 0x000e000d,
820         0x9210, 0xffffffff, 0x00110010,
821         0x9214, 0xffffffff, 0x000a0009,
822         0x9218, 0xffffffff, 0x000c000b,
823         0x921c, 0xffffffff, 0x0000000f,
824         0x9220, 0xffffffff, 0x000e000d,
825         0x9224, 0xffffffff, 0x00110010,
826         0x9228, 0xffffffff, 0x000b000a,
827         0x922c, 0xffffffff, 0x000d000c,
828         0x9230, 0xffffffff, 0x00000010,
829         0x9234, 0xffffffff, 0x000f000e,
830         0x9238, 0xffffffff, 0x00120011,
831         0x923c, 0xffffffff, 0x000c000b,
832         0x9240, 0xffffffff, 0x000e000d,
833         0x9244, 0xffffffff, 0x00000011,
834         0x9248, 0xffffffff, 0x0010000f,
835         0x924c, 0xffffffff, 0x00130012,
836         0x9250, 0xffffffff, 0x000d000c,
837         0x9254, 0xffffffff, 0x000f000e,
838         0x9258, 0xffffffff, 0x00100013,
839         0x925c, 0xffffffff, 0x00120011,
840         0x9260, 0xffffffff, 0x00150014,
841         0x9150, 0xffffffff, 0x96940200,
842         0x8708, 0xffffffff, 0x00900100,
843         0xc478, 0xffffffff, 0x00000080,
844         0xc404, 0xffffffff, 0x0020003f,
845         0x30, 0xffffffff, 0x0000001c,
846         0x34, 0x000f0000, 0x000f0000,
847         0x160c, 0xffffffff, 0x00000100,
848         0x1024, 0xffffffff, 0x00000100,
849         0x102c, 0x00000101, 0x00000000,
850         0x20a8, 0xffffffff, 0x00000104,
851         0x264c, 0x000c0000, 0x000c0000,
852         0x2648, 0x000c0000, 0x000c0000,
853         0x55e4, 0xff000fff, 0x00000100,
854         0x55e8, 0x00000001, 0x00000001,
855         0x2f50, 0x00000001, 0x00000001,
856         0x30cc, 0xc0000fff, 0x00000104,
857         0xc1e4, 0x00000001, 0x00000001,
858         0xd0c0, 0xfffffff0, 0x00000100,
859         0xd8c0, 0xfffffff0, 0x00000100
860 };
861
862 static const u32 oland_mgcg_cgcg_init[] =
863 {
864         0xc400, 0xffffffff, 0xfffffffc,
865         0x802c, 0xffffffff, 0xe0000000,
866         0x9a60, 0xffffffff, 0x00000100,
867         0x92a4, 0xffffffff, 0x00000100,
868         0xc164, 0xffffffff, 0x00000100,
869         0x9774, 0xffffffff, 0x00000100,
870         0x8984, 0xffffffff, 0x06000100,
871         0x8a18, 0xffffffff, 0x00000100,
872         0x92a0, 0xffffffff, 0x00000100,
873         0xc380, 0xffffffff, 0x00000100,
874         0x8b28, 0xffffffff, 0x00000100,
875         0x9144, 0xffffffff, 0x00000100,
876         0x8d88, 0xffffffff, 0x00000100,
877         0x8d8c, 0xffffffff, 0x00000100,
878         0x9030, 0xffffffff, 0x00000100,
879         0x9034, 0xffffffff, 0x00000100,
880         0x9038, 0xffffffff, 0x00000100,
881         0x903c, 0xffffffff, 0x00000100,
882         0xad80, 0xffffffff, 0x00000100,
883         0xac54, 0xffffffff, 0x00000100,
884         0x897c, 0xffffffff, 0x06000100,
885         0x9868, 0xffffffff, 0x00000100,
886         0x9510, 0xffffffff, 0x00000100,
887         0xaf04, 0xffffffff, 0x00000100,
888         0xae04, 0xffffffff, 0x00000100,
889         0x949c, 0xffffffff, 0x00000100,
890         0x802c, 0xffffffff, 0xe0000000,
891         0x9160, 0xffffffff, 0x00010000,
892         0x9164, 0xffffffff, 0x00030002,
893         0x9168, 0xffffffff, 0x00040007,
894         0x916c, 0xffffffff, 0x00060005,
895         0x9170, 0xffffffff, 0x00090008,
896         0x9174, 0xffffffff, 0x00020001,
897         0x9178, 0xffffffff, 0x00040003,
898         0x917c, 0xffffffff, 0x00000007,
899         0x9180, 0xffffffff, 0x00060005,
900         0x9184, 0xffffffff, 0x00090008,
901         0x9188, 0xffffffff, 0x00030002,
902         0x918c, 0xffffffff, 0x00050004,
903         0x9190, 0xffffffff, 0x00000008,
904         0x9194, 0xffffffff, 0x00070006,
905         0x9198, 0xffffffff, 0x000a0009,
906         0x919c, 0xffffffff, 0x00040003,
907         0x91a0, 0xffffffff, 0x00060005,
908         0x91a4, 0xffffffff, 0x00000009,
909         0x91a8, 0xffffffff, 0x00080007,
910         0x91ac, 0xffffffff, 0x000b000a,
911         0x91b0, 0xffffffff, 0x00050004,
912         0x91b4, 0xffffffff, 0x00070006,
913         0x91b8, 0xffffffff, 0x0008000b,
914         0x91bc, 0xffffffff, 0x000a0009,
915         0x91c0, 0xffffffff, 0x000d000c,
916         0x91c4, 0xffffffff, 0x00060005,
917         0x91c8, 0xffffffff, 0x00080007,
918         0x91cc, 0xffffffff, 0x0000000b,
919         0x91d0, 0xffffffff, 0x000a0009,
920         0x91d4, 0xffffffff, 0x000d000c,
921         0x9150, 0xffffffff, 0x96940200,
922         0x8708, 0xffffffff, 0x00900100,
923         0xc478, 0xffffffff, 0x00000080,
924         0xc404, 0xffffffff, 0x0020003f,
925         0x30, 0xffffffff, 0x0000001c,
926         0x34, 0x000f0000, 0x000f0000,
927         0x160c, 0xffffffff, 0x00000100,
928         0x1024, 0xffffffff, 0x00000100,
929         0x102c, 0x00000101, 0x00000000,
930         0x20a8, 0xffffffff, 0x00000104,
931         0x264c, 0x000c0000, 0x000c0000,
932         0x2648, 0x000c0000, 0x000c0000,
933         0x55e4, 0xff000fff, 0x00000100,
934         0x55e8, 0x00000001, 0x00000001,
935         0x2f50, 0x00000001, 0x00000001,
936         0x30cc, 0xc0000fff, 0x00000104,
937         0xc1e4, 0x00000001, 0x00000001,
938         0xd0c0, 0xfffffff0, 0x00000100,
939         0xd8c0, 0xfffffff0, 0x00000100
940 };
941
942 static const u32 hainan_mgcg_cgcg_init[] =
943 {
944         0xc400, 0xffffffff, 0xfffffffc,
945         0x802c, 0xffffffff, 0xe0000000,
946         0x9a60, 0xffffffff, 0x00000100,
947         0x92a4, 0xffffffff, 0x00000100,
948         0xc164, 0xffffffff, 0x00000100,
949         0x9774, 0xffffffff, 0x00000100,
950         0x8984, 0xffffffff, 0x06000100,
951         0x8a18, 0xffffffff, 0x00000100,
952         0x92a0, 0xffffffff, 0x00000100,
953         0xc380, 0xffffffff, 0x00000100,
954         0x8b28, 0xffffffff, 0x00000100,
955         0x9144, 0xffffffff, 0x00000100,
956         0x8d88, 0xffffffff, 0x00000100,
957         0x8d8c, 0xffffffff, 0x00000100,
958         0x9030, 0xffffffff, 0x00000100,
959         0x9034, 0xffffffff, 0x00000100,
960         0x9038, 0xffffffff, 0x00000100,
961         0x903c, 0xffffffff, 0x00000100,
962         0xad80, 0xffffffff, 0x00000100,
963         0xac54, 0xffffffff, 0x00000100,
964         0x897c, 0xffffffff, 0x06000100,
965         0x9868, 0xffffffff, 0x00000100,
966         0x9510, 0xffffffff, 0x00000100,
967         0xaf04, 0xffffffff, 0x00000100,
968         0xae04, 0xffffffff, 0x00000100,
969         0x949c, 0xffffffff, 0x00000100,
970         0x802c, 0xffffffff, 0xe0000000,
971         0x9160, 0xffffffff, 0x00010000,
972         0x9164, 0xffffffff, 0x00030002,
973         0x9168, 0xffffffff, 0x00040007,
974         0x916c, 0xffffffff, 0x00060005,
975         0x9170, 0xffffffff, 0x00090008,
976         0x9174, 0xffffffff, 0x00020001,
977         0x9178, 0xffffffff, 0x00040003,
978         0x917c, 0xffffffff, 0x00000007,
979         0x9180, 0xffffffff, 0x00060005,
980         0x9184, 0xffffffff, 0x00090008,
981         0x9188, 0xffffffff, 0x00030002,
982         0x918c, 0xffffffff, 0x00050004,
983         0x9190, 0xffffffff, 0x00000008,
984         0x9194, 0xffffffff, 0x00070006,
985         0x9198, 0xffffffff, 0x000a0009,
986         0x919c, 0xffffffff, 0x00040003,
987         0x91a0, 0xffffffff, 0x00060005,
988         0x91a4, 0xffffffff, 0x00000009,
989         0x91a8, 0xffffffff, 0x00080007,
990         0x91ac, 0xffffffff, 0x000b000a,
991         0x91b0, 0xffffffff, 0x00050004,
992         0x91b4, 0xffffffff, 0x00070006,
993         0x91b8, 0xffffffff, 0x0008000b,
994         0x91bc, 0xffffffff, 0x000a0009,
995         0x91c0, 0xffffffff, 0x000d000c,
996         0x91c4, 0xffffffff, 0x00060005,
997         0x91c8, 0xffffffff, 0x00080007,
998         0x91cc, 0xffffffff, 0x0000000b,
999         0x91d0, 0xffffffff, 0x000a0009,
1000         0x91d4, 0xffffffff, 0x000d000c,
1001         0x9150, 0xffffffff, 0x96940200,
1002         0x8708, 0xffffffff, 0x00900100,
1003         0xc478, 0xffffffff, 0x00000080,
1004         0xc404, 0xffffffff, 0x0020003f,
1005         0x30, 0xffffffff, 0x0000001c,
1006         0x34, 0x000f0000, 0x000f0000,
1007         0x160c, 0xffffffff, 0x00000100,
1008         0x1024, 0xffffffff, 0x00000100,
1009         0x20a8, 0xffffffff, 0x00000104,
1010         0x264c, 0x000c0000, 0x000c0000,
1011         0x2648, 0x000c0000, 0x000c0000,
1012         0x2f50, 0x00000001, 0x00000001,
1013         0x30cc, 0xc0000fff, 0x00000104,
1014         0xc1e4, 0x00000001, 0x00000001,
1015         0xd0c0, 0xfffffff0, 0x00000100,
1016         0xd8c0, 0xfffffff0, 0x00000100
1017 };
1018
1019 static u32 verde_pg_init[] =
1020 {
1021         0x353c, 0xffffffff, 0x40000,
1022         0x3538, 0xffffffff, 0x200010ff,
1023         0x353c, 0xffffffff, 0x0,
1024         0x353c, 0xffffffff, 0x0,
1025         0x353c, 0xffffffff, 0x0,
1026         0x353c, 0xffffffff, 0x0,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x7007,
1029         0x3538, 0xffffffff, 0x300010ff,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x0,
1033         0x353c, 0xffffffff, 0x0,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x400000,
1036         0x3538, 0xffffffff, 0x100010ff,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x0,
1040         0x353c, 0xffffffff, 0x0,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x120200,
1043         0x3538, 0xffffffff, 0x500010ff,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x0,
1047         0x353c, 0xffffffff, 0x0,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x1e1e16,
1050         0x3538, 0xffffffff, 0x600010ff,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x0,
1054         0x353c, 0xffffffff, 0x0,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x171f1e,
1057         0x3538, 0xffffffff, 0x700010ff,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x0,
1061         0x353c, 0xffffffff, 0x0,
1062         0x353c, 0xffffffff, 0x0,
1063         0x353c, 0xffffffff, 0x0,
1064         0x3538, 0xffffffff, 0x9ff,
1065         0x3500, 0xffffffff, 0x0,
1066         0x3504, 0xffffffff, 0x10000800,
1067         0x3504, 0xffffffff, 0xf,
1068         0x3504, 0xffffffff, 0xf,
1069         0x3500, 0xffffffff, 0x4,
1070         0x3504, 0xffffffff, 0x1000051e,
1071         0x3504, 0xffffffff, 0xffff,
1072         0x3504, 0xffffffff, 0xffff,
1073         0x3500, 0xffffffff, 0x8,
1074         0x3504, 0xffffffff, 0x80500,
1075         0x3500, 0xffffffff, 0x12,
1076         0x3504, 0xffffffff, 0x9050c,
1077         0x3500, 0xffffffff, 0x1d,
1078         0x3504, 0xffffffff, 0xb052c,
1079         0x3500, 0xffffffff, 0x2a,
1080         0x3504, 0xffffffff, 0x1053e,
1081         0x3500, 0xffffffff, 0x2d,
1082         0x3504, 0xffffffff, 0x10546,
1083         0x3500, 0xffffffff, 0x30,
1084         0x3504, 0xffffffff, 0xa054e,
1085         0x3500, 0xffffffff, 0x3c,
1086         0x3504, 0xffffffff, 0x1055f,
1087         0x3500, 0xffffffff, 0x3f,
1088         0x3504, 0xffffffff, 0x10567,
1089         0x3500, 0xffffffff, 0x42,
1090         0x3504, 0xffffffff, 0x1056f,
1091         0x3500, 0xffffffff, 0x45,
1092         0x3504, 0xffffffff, 0x10572,
1093         0x3500, 0xffffffff, 0x48,
1094         0x3504, 0xffffffff, 0x20575,
1095         0x3500, 0xffffffff, 0x4c,
1096         0x3504, 0xffffffff, 0x190801,
1097         0x3500, 0xffffffff, 0x67,
1098         0x3504, 0xffffffff, 0x1082a,
1099         0x3500, 0xffffffff, 0x6a,
1100         0x3504, 0xffffffff, 0x1b082d,
1101         0x3500, 0xffffffff, 0x87,
1102         0x3504, 0xffffffff, 0x310851,
1103         0x3500, 0xffffffff, 0xba,
1104         0x3504, 0xffffffff, 0x891,
1105         0x3500, 0xffffffff, 0xbc,
1106         0x3504, 0xffffffff, 0x893,
1107         0x3500, 0xffffffff, 0xbe,
1108         0x3504, 0xffffffff, 0x20895,
1109         0x3500, 0xffffffff, 0xc2,
1110         0x3504, 0xffffffff, 0x20899,
1111         0x3500, 0xffffffff, 0xc6,
1112         0x3504, 0xffffffff, 0x2089d,
1113         0x3500, 0xffffffff, 0xca,
1114         0x3504, 0xffffffff, 0x8a1,
1115         0x3500, 0xffffffff, 0xcc,
1116         0x3504, 0xffffffff, 0x8a3,
1117         0x3500, 0xffffffff, 0xce,
1118         0x3504, 0xffffffff, 0x308a5,
1119         0x3500, 0xffffffff, 0xd3,
1120         0x3504, 0xffffffff, 0x6d08cd,
1121         0x3500, 0xffffffff, 0x142,
1122         0x3504, 0xffffffff, 0x2000095a,
1123         0x3504, 0xffffffff, 0x1,
1124         0x3500, 0xffffffff, 0x144,
1125         0x3504, 0xffffffff, 0x301f095b,
1126         0x3500, 0xffffffff, 0x165,
1127         0x3504, 0xffffffff, 0xc094d,
1128         0x3500, 0xffffffff, 0x173,
1129         0x3504, 0xffffffff, 0xf096d,
1130         0x3500, 0xffffffff, 0x184,
1131         0x3504, 0xffffffff, 0x15097f,
1132         0x3500, 0xffffffff, 0x19b,
1133         0x3504, 0xffffffff, 0xc0998,
1134         0x3500, 0xffffffff, 0x1a9,
1135         0x3504, 0xffffffff, 0x409a7,
1136         0x3500, 0xffffffff, 0x1af,
1137         0x3504, 0xffffffff, 0xcdc,
1138         0x3500, 0xffffffff, 0x1b1,
1139         0x3504, 0xffffffff, 0x800,
1140         0x3508, 0xffffffff, 0x6c9b2000,
1141         0x3510, 0xfc00, 0x2000,
1142         0x3544, 0xffffffff, 0xfc0,
1143         0x28d4, 0x00000100, 0x100
1144 };
1145
1146 static void si_init_golden_registers(struct radeon_device *rdev)
1147 {
1148         switch (rdev->family) {
1149         case CHIP_TAHITI:
1150                 radeon_program_register_sequence(rdev,
1151                                                  tahiti_golden_registers,
1152                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1153                 radeon_program_register_sequence(rdev,
1154                                                  tahiti_golden_rlc_registers,
1155                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1156                 radeon_program_register_sequence(rdev,
1157                                                  tahiti_mgcg_cgcg_init,
1158                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1159                 radeon_program_register_sequence(rdev,
1160                                                  tahiti_golden_registers2,
1161                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1162                 break;
1163         case CHIP_PITCAIRN:
1164                 radeon_program_register_sequence(rdev,
1165                                                  pitcairn_golden_registers,
1166                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1167                 radeon_program_register_sequence(rdev,
1168                                                  pitcairn_golden_rlc_registers,
1169                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1170                 radeon_program_register_sequence(rdev,
1171                                                  pitcairn_mgcg_cgcg_init,
1172                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1173                 break;
1174         case CHIP_VERDE:
1175                 radeon_program_register_sequence(rdev,
1176                                                  verde_golden_registers,
1177                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1178                 radeon_program_register_sequence(rdev,
1179                                                  verde_golden_rlc_registers,
1180                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1181                 radeon_program_register_sequence(rdev,
1182                                                  verde_mgcg_cgcg_init,
1183                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1184                 radeon_program_register_sequence(rdev,
1185                                                  verde_pg_init,
1186                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1187                 break;
1188         case CHIP_OLAND:
1189                 radeon_program_register_sequence(rdev,
1190                                                  oland_golden_registers,
1191                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1192                 radeon_program_register_sequence(rdev,
1193                                                  oland_golden_rlc_registers,
1194                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1195                 radeon_program_register_sequence(rdev,
1196                                                  oland_mgcg_cgcg_init,
1197                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1198                 break;
1199         case CHIP_HAINAN:
1200                 radeon_program_register_sequence(rdev,
1201                                                  hainan_golden_registers,
1202                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1203                 radeon_program_register_sequence(rdev,
1204                                                  hainan_golden_registers2,
1205                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1206                 radeon_program_register_sequence(rdev,
1207                                                  hainan_mgcg_cgcg_init,
1208                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1209                 break;
1210         default:
1211                 break;
1212         }
1213 }
1214
1215 #define PCIE_BUS_CLK                10000
1216 #define TCLK                        (PCIE_BUS_CLK / 10)
1217
1218 /**
1219  * si_get_xclk - get the xclk
1220  *
1221  * @rdev: radeon_device pointer
1222  *
1223  * Returns the reference clock used by the gfx engine
1224  * (SI).
1225  */
1226 u32 si_get_xclk(struct radeon_device *rdev)
1227 {
1228         u32 reference_clock = rdev->clock.spll.reference_freq;
1229         u32 tmp;
1230
1231         tmp = RREG32(CG_CLKPIN_CNTL_2);
1232         if (tmp & MUX_TCLK_TO_XCLK)
1233                 return TCLK;
1234
1235         tmp = RREG32(CG_CLKPIN_CNTL);
1236         if (tmp & XTALIN_DIVIDE)
1237                 return reference_clock / 4;
1238
1239         return reference_clock;
1240 }
1241
1242 /* get temperature in millidegrees */
1243 int si_get_temp(struct radeon_device *rdev)
1244 {
1245         u32 temp;
1246         int actual_temp = 0;
1247
1248         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1249                 CTF_TEMP_SHIFT;
1250
1251         if (temp & 0x200)
1252                 actual_temp = 255;
1253         else
1254                 actual_temp = temp & 0x1ff;
1255
1256         actual_temp = (actual_temp * 1000);
1257
1258         return actual_temp;
1259 }
1260
1261 #define TAHITI_IO_MC_REGS_SIZE 36
1262
1263 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1264         {0x0000006f, 0x03044000},
1265         {0x00000070, 0x0480c018},
1266         {0x00000071, 0x00000040},
1267         {0x00000072, 0x01000000},
1268         {0x00000074, 0x000000ff},
1269         {0x00000075, 0x00143400},
1270         {0x00000076, 0x08ec0800},
1271         {0x00000077, 0x040000cc},
1272         {0x00000079, 0x00000000},
1273         {0x0000007a, 0x21000409},
1274         {0x0000007c, 0x00000000},
1275         {0x0000007d, 0xe8000000},
1276         {0x0000007e, 0x044408a8},
1277         {0x0000007f, 0x00000003},
1278         {0x00000080, 0x00000000},
1279         {0x00000081, 0x01000000},
1280         {0x00000082, 0x02000000},
1281         {0x00000083, 0x00000000},
1282         {0x00000084, 0xe3f3e4f4},
1283         {0x00000085, 0x00052024},
1284         {0x00000087, 0x00000000},
1285         {0x00000088, 0x66036603},
1286         {0x00000089, 0x01000000},
1287         {0x0000008b, 0x1c0a0000},
1288         {0x0000008c, 0xff010000},
1289         {0x0000008e, 0xffffefff},
1290         {0x0000008f, 0xfff3efff},
1291         {0x00000090, 0xfff3efbf},
1292         {0x00000094, 0x00101101},
1293         {0x00000095, 0x00000fff},
1294         {0x00000096, 0x00116fff},
1295         {0x00000097, 0x60010000},
1296         {0x00000098, 0x10010000},
1297         {0x00000099, 0x00006000},
1298         {0x0000009a, 0x00001000},
1299         {0x0000009f, 0x00a77400}
1300 };
1301
1302 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1303         {0x0000006f, 0x03044000},
1304         {0x00000070, 0x0480c018},
1305         {0x00000071, 0x00000040},
1306         {0x00000072, 0x01000000},
1307         {0x00000074, 0x000000ff},
1308         {0x00000075, 0x00143400},
1309         {0x00000076, 0x08ec0800},
1310         {0x00000077, 0x040000cc},
1311         {0x00000079, 0x00000000},
1312         {0x0000007a, 0x21000409},
1313         {0x0000007c, 0x00000000},
1314         {0x0000007d, 0xe8000000},
1315         {0x0000007e, 0x044408a8},
1316         {0x0000007f, 0x00000003},
1317         {0x00000080, 0x00000000},
1318         {0x00000081, 0x01000000},
1319         {0x00000082, 0x02000000},
1320         {0x00000083, 0x00000000},
1321         {0x00000084, 0xe3f3e4f4},
1322         {0x00000085, 0x00052024},
1323         {0x00000087, 0x00000000},
1324         {0x00000088, 0x66036603},
1325         {0x00000089, 0x01000000},
1326         {0x0000008b, 0x1c0a0000},
1327         {0x0000008c, 0xff010000},
1328         {0x0000008e, 0xffffefff},
1329         {0x0000008f, 0xfff3efff},
1330         {0x00000090, 0xfff3efbf},
1331         {0x00000094, 0x00101101},
1332         {0x00000095, 0x00000fff},
1333         {0x00000096, 0x00116fff},
1334         {0x00000097, 0x60010000},
1335         {0x00000098, 0x10010000},
1336         {0x00000099, 0x00006000},
1337         {0x0000009a, 0x00001000},
1338         {0x0000009f, 0x00a47400}
1339 };
1340
1341 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1342         {0x0000006f, 0x03044000},
1343         {0x00000070, 0x0480c018},
1344         {0x00000071, 0x00000040},
1345         {0x00000072, 0x01000000},
1346         {0x00000074, 0x000000ff},
1347         {0x00000075, 0x00143400},
1348         {0x00000076, 0x08ec0800},
1349         {0x00000077, 0x040000cc},
1350         {0x00000079, 0x00000000},
1351         {0x0000007a, 0x21000409},
1352         {0x0000007c, 0x00000000},
1353         {0x0000007d, 0xe8000000},
1354         {0x0000007e, 0x044408a8},
1355         {0x0000007f, 0x00000003},
1356         {0x00000080, 0x00000000},
1357         {0x00000081, 0x01000000},
1358         {0x00000082, 0x02000000},
1359         {0x00000083, 0x00000000},
1360         {0x00000084, 0xe3f3e4f4},
1361         {0x00000085, 0x00052024},
1362         {0x00000087, 0x00000000},
1363         {0x00000088, 0x66036603},
1364         {0x00000089, 0x01000000},
1365         {0x0000008b, 0x1c0a0000},
1366         {0x0000008c, 0xff010000},
1367         {0x0000008e, 0xffffefff},
1368         {0x0000008f, 0xfff3efff},
1369         {0x00000090, 0xfff3efbf},
1370         {0x00000094, 0x00101101},
1371         {0x00000095, 0x00000fff},
1372         {0x00000096, 0x00116fff},
1373         {0x00000097, 0x60010000},
1374         {0x00000098, 0x10010000},
1375         {0x00000099, 0x00006000},
1376         {0x0000009a, 0x00001000},
1377         {0x0000009f, 0x00a37400}
1378 };
1379
1380 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1381         {0x0000006f, 0x03044000},
1382         {0x00000070, 0x0480c018},
1383         {0x00000071, 0x00000040},
1384         {0x00000072, 0x01000000},
1385         {0x00000074, 0x000000ff},
1386         {0x00000075, 0x00143400},
1387         {0x00000076, 0x08ec0800},
1388         {0x00000077, 0x040000cc},
1389         {0x00000079, 0x00000000},
1390         {0x0000007a, 0x21000409},
1391         {0x0000007c, 0x00000000},
1392         {0x0000007d, 0xe8000000},
1393         {0x0000007e, 0x044408a8},
1394         {0x0000007f, 0x00000003},
1395         {0x00000080, 0x00000000},
1396         {0x00000081, 0x01000000},
1397         {0x00000082, 0x02000000},
1398         {0x00000083, 0x00000000},
1399         {0x00000084, 0xe3f3e4f4},
1400         {0x00000085, 0x00052024},
1401         {0x00000087, 0x00000000},
1402         {0x00000088, 0x66036603},
1403         {0x00000089, 0x01000000},
1404         {0x0000008b, 0x1c0a0000},
1405         {0x0000008c, 0xff010000},
1406         {0x0000008e, 0xffffefff},
1407         {0x0000008f, 0xfff3efff},
1408         {0x00000090, 0xfff3efbf},
1409         {0x00000094, 0x00101101},
1410         {0x00000095, 0x00000fff},
1411         {0x00000096, 0x00116fff},
1412         {0x00000097, 0x60010000},
1413         {0x00000098, 0x10010000},
1414         {0x00000099, 0x00006000},
1415         {0x0000009a, 0x00001000},
1416         {0x0000009f, 0x00a17730}
1417 };
1418
1419 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1420         {0x0000006f, 0x03044000},
1421         {0x00000070, 0x0480c018},
1422         {0x00000071, 0x00000040},
1423         {0x00000072, 0x01000000},
1424         {0x00000074, 0x000000ff},
1425         {0x00000075, 0x00143400},
1426         {0x00000076, 0x08ec0800},
1427         {0x00000077, 0x040000cc},
1428         {0x00000079, 0x00000000},
1429         {0x0000007a, 0x21000409},
1430         {0x0000007c, 0x00000000},
1431         {0x0000007d, 0xe8000000},
1432         {0x0000007e, 0x044408a8},
1433         {0x0000007f, 0x00000003},
1434         {0x00000080, 0x00000000},
1435         {0x00000081, 0x01000000},
1436         {0x00000082, 0x02000000},
1437         {0x00000083, 0x00000000},
1438         {0x00000084, 0xe3f3e4f4},
1439         {0x00000085, 0x00052024},
1440         {0x00000087, 0x00000000},
1441         {0x00000088, 0x66036603},
1442         {0x00000089, 0x01000000},
1443         {0x0000008b, 0x1c0a0000},
1444         {0x0000008c, 0xff010000},
1445         {0x0000008e, 0xffffefff},
1446         {0x0000008f, 0xfff3efff},
1447         {0x00000090, 0xfff3efbf},
1448         {0x00000094, 0x00101101},
1449         {0x00000095, 0x00000fff},
1450         {0x00000096, 0x00116fff},
1451         {0x00000097, 0x60010000},
1452         {0x00000098, 0x10010000},
1453         {0x00000099, 0x00006000},
1454         {0x0000009a, 0x00001000},
1455         {0x0000009f, 0x00a07730}
1456 };
1457
1458 /* ucode loading */
1459 static int si_mc_load_microcode(struct radeon_device *rdev)
1460 {
1461         const __be32 *fw_data;
1462         u32 running, blackout = 0;
1463         u32 *io_mc_regs;
1464         int i, ucode_size, regs_size;
1465
1466         if (!rdev->mc_fw)
1467                 return -EINVAL;
1468
1469         switch (rdev->family) {
1470         case CHIP_TAHITI:
1471                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1472                 ucode_size = SI_MC_UCODE_SIZE;
1473                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1474                 break;
1475         case CHIP_PITCAIRN:
1476                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1477                 ucode_size = SI_MC_UCODE_SIZE;
1478                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1479                 break;
1480         case CHIP_VERDE:
1481         default:
1482                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1483                 ucode_size = SI_MC_UCODE_SIZE;
1484                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1485                 break;
1486         case CHIP_OLAND:
1487                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1488                 ucode_size = OLAND_MC_UCODE_SIZE;
1489                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1490                 break;
1491         case CHIP_HAINAN:
1492                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1493                 ucode_size = OLAND_MC_UCODE_SIZE;
1494                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1495                 break;
1496         }
1497
1498         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1499
1500         if (running == 0) {
1501                 if (running) {
1502                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1503                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1504                 }
1505
1506                 /* reset the engine and set to writable */
1507                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1508                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1509
1510                 /* load mc io regs */
1511                 for (i = 0; i < regs_size; i++) {
1512                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1513                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1514                 }
1515                 /* load the MC ucode */
1516                 fw_data = (const __be32 *)rdev->mc_fw->data;
1517                 for (i = 0; i < ucode_size; i++)
1518                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1519
1520                 /* put the engine back into the active state */
1521                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1522                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1523                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1524
1525                 /* wait for training to complete */
1526                 for (i = 0; i < rdev->usec_timeout; i++) {
1527                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1528                                 break;
1529                         udelay(1);
1530                 }
1531                 for (i = 0; i < rdev->usec_timeout; i++) {
1532                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1533                                 break;
1534                         udelay(1);
1535                 }
1536
1537                 if (running)
1538                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1539         }
1540
1541         return 0;
1542 }
1543
1544 static int si_init_microcode(struct radeon_device *rdev)
1545 {
1546         const char *chip_name;
1547         const char *rlc_chip_name;
1548         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1549         size_t smc_req_size;
1550         char fw_name[30];
1551         int err;
1552
1553         DRM_DEBUG("\n");
1554
1555         switch (rdev->family) {
1556         case CHIP_TAHITI:
1557                 chip_name = "TAHITI";
1558                 rlc_chip_name = "TAHITI";
1559                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1560                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1561                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1562                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1563                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1564                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1565                 break;
1566         case CHIP_PITCAIRN:
1567                 chip_name = "PITCAIRN";
1568                 rlc_chip_name = "PITCAIRN";
1569                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1570                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1571                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1572                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1573                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1574                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1575                 break;
1576         case CHIP_VERDE:
1577                 chip_name = "VERDE";
1578                 rlc_chip_name = "VERDE";
1579                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1580                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1581                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1582                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1583                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1584                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1585                 break;
1586         case CHIP_OLAND:
1587                 chip_name = "OLAND";
1588                 rlc_chip_name = "OLAND";
1589                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1590                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1591                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1592                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1593                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1594                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1595                 break;
1596         case CHIP_HAINAN:
1597                 chip_name = "HAINAN";
1598                 rlc_chip_name = "HAINAN";
1599                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1600                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1601                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1602                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1603                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1604                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1605                 break;
1606         default: BUG();
1607         }
1608
1609         DRM_INFO("Loading %s Microcode\n", chip_name);
1610
1611         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1612         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1613         if (err)
1614                 goto out;
1615         if (rdev->pfp_fw->datasize != pfp_req_size) {
1616                 printk(KERN_ERR
1617                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1618                        rdev->pfp_fw->datasize, fw_name);
1619                 err = -EINVAL;
1620                 goto out;
1621         }
1622
1623         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1624         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1625         if (err)
1626                 goto out;
1627         if (rdev->me_fw->datasize != me_req_size) {
1628                 printk(KERN_ERR
1629                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1630                        rdev->me_fw->datasize, fw_name);
1631                 err = -EINVAL;
1632         }
1633
1634         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1635         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1636         if (err)
1637                 goto out;
1638         if (rdev->ce_fw->datasize != ce_req_size) {
1639                 printk(KERN_ERR
1640                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1641                        rdev->ce_fw->datasize, fw_name);
1642                 err = -EINVAL;
1643         }
1644
1645         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", rlc_chip_name);
1646         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1647         if (err)
1648                 goto out;
1649         if (rdev->rlc_fw->datasize != rlc_req_size) {
1650                 printk(KERN_ERR
1651                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1652                        rdev->rlc_fw->datasize, fw_name);
1653                 err = -EINVAL;
1654         }
1655
1656         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1657         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1658         if (err)
1659                 goto out;
1660         if (rdev->mc_fw->datasize != mc_req_size) {
1661                 printk(KERN_ERR
1662                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1663                        rdev->mc_fw->datasize, fw_name);
1664                 err = -EINVAL;
1665         }
1666
1667         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1668         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1669         if (err) {
1670                 printk(KERN_ERR
1671                        "smc: error loading firmware \"%s\"\n",
1672                        fw_name);
1673                 release_firmware(rdev->smc_fw);
1674                 rdev->smc_fw = NULL;
1675                 err = 0;
1676         } else if (rdev->smc_fw->datasize != smc_req_size) {
1677                 printk(KERN_ERR
1678                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1679                        rdev->smc_fw->datasize, fw_name);
1680                 err = -EINVAL;
1681         }
1682
1683 out:
1684         if (err) {
1685                 if (err != -EINVAL)
1686                         printk(KERN_ERR
1687                                "si_cp: Failed to load firmware \"%s\"\n",
1688                                fw_name);
1689                 release_firmware(rdev->pfp_fw);
1690                 rdev->pfp_fw = NULL;
1691                 release_firmware(rdev->me_fw);
1692                 rdev->me_fw = NULL;
1693                 release_firmware(rdev->ce_fw);
1694                 rdev->ce_fw = NULL;
1695                 release_firmware(rdev->rlc_fw);
1696                 rdev->rlc_fw = NULL;
1697                 release_firmware(rdev->mc_fw);
1698                 rdev->mc_fw = NULL;
1699                 release_firmware(rdev->smc_fw);
1700                 rdev->smc_fw = NULL;
1701         }
1702         return err;
1703 }
1704
1705 /**
1706  * si_fini_microcode - drop the firmwares image references
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Drop the pfp, me, rlc, mc and ce firmware image references.
1711  * Called at driver shutdown.
1712  */
1713 static void si_fini_microcode(struct radeon_device *rdev)
1714 {
1715         release_firmware(rdev->pfp_fw);
1716         rdev->pfp_fw = NULL;
1717         release_firmware(rdev->me_fw);
1718         rdev->me_fw = NULL;
1719         release_firmware(rdev->rlc_fw);
1720         rdev->rlc_fw = NULL;
1721         release_firmware(rdev->mc_fw);
1722         rdev->mc_fw = NULL;
1723         release_firmware(rdev->smc_fw);
1724         rdev->smc_fw = NULL;
1725         release_firmware(rdev->ce_fw);
1726         rdev->ce_fw = NULL;
1727 }
1728
1729 /* watermark setup */
1730 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1731                                    struct radeon_crtc *radeon_crtc,
1732                                    struct drm_display_mode *mode,
1733                                    struct drm_display_mode *other_mode)
1734 {
1735         u32 tmp, buffer_alloc, i;
1736         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1737         /*
1738          * Line Buffer Setup
1739          * There are 3 line buffers, each one shared by 2 display controllers.
1740          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1741          * the display controllers.  The paritioning is done via one of four
1742          * preset allocations specified in bits 21:20:
1743          *  0 - half lb
1744          *  2 - whole lb, other crtc must be disabled
1745          */
1746         /* this can get tricky if we have two large displays on a paired group
1747          * of crtcs.  Ideally for multiple large displays we'd assign them to
1748          * non-linked crtcs for maximum line buffer allocation.
1749          */
1750         if (radeon_crtc->base.enabled && mode) {
1751                 if (other_mode) {
1752                         tmp = 0; /* 1/2 */
1753                         buffer_alloc = 1;
1754                 } else {
1755                         tmp = 2; /* whole */
1756                         buffer_alloc = 2;
1757                 }
1758         } else {
1759                 tmp = 0;
1760                 buffer_alloc = 0;
1761         }
1762
1763         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1764                DC_LB_MEMORY_CONFIG(tmp));
1765
1766         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1767                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1768         for (i = 0; i < rdev->usec_timeout; i++) {
1769                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1770                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1771                         break;
1772                 udelay(1);
1773         }
1774
1775         if (radeon_crtc->base.enabled && mode) {
1776                 switch (tmp) {
1777                 case 0:
1778                 default:
1779                         return 4096 * 2;
1780                 case 2:
1781                         return 8192 * 2;
1782                 }
1783         }
1784
1785         /* controller not enabled, so no lb used */
1786         return 0;
1787 }
1788
1789 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1790 {
1791         u32 tmp = RREG32(MC_SHARED_CHMAP);
1792
1793         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1794         case 0:
1795         default:
1796                 return 1;
1797         case 1:
1798                 return 2;
1799         case 2:
1800                 return 4;
1801         case 3:
1802                 return 8;
1803         case 4:
1804                 return 3;
1805         case 5:
1806                 return 6;
1807         case 6:
1808                 return 10;
1809         case 7:
1810                 return 12;
1811         case 8:
1812                 return 16;
1813         }
1814 }
1815
1816 struct dce6_wm_params {
1817         u32 dram_channels; /* number of dram channels */
1818         u32 yclk;          /* bandwidth per dram data pin in kHz */
1819         u32 sclk;          /* engine clock in kHz */
1820         u32 disp_clk;      /* display clock in kHz */
1821         u32 src_width;     /* viewport width */
1822         u32 active_time;   /* active display time in ns */
1823         u32 blank_time;    /* blank time in ns */
1824         bool interlaced;    /* mode is interlaced */
1825         fixed20_12 vsc;    /* vertical scale ratio */
1826         u32 num_heads;     /* number of active crtcs */
1827         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1828         u32 lb_size;       /* line buffer allocated to pipe */
1829         u32 vtaps;         /* vertical scaler taps */
1830 };
1831
1832 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1833 {
1834         /* Calculate raw DRAM Bandwidth */
1835         fixed20_12 dram_efficiency; /* 0.7 */
1836         fixed20_12 yclk, dram_channels, bandwidth;
1837         fixed20_12 a;
1838
1839         a.full = dfixed_const(1000);
1840         yclk.full = dfixed_const(wm->yclk);
1841         yclk.full = dfixed_div(yclk, a);
1842         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1843         a.full = dfixed_const(10);
1844         dram_efficiency.full = dfixed_const(7);
1845         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1846         bandwidth.full = dfixed_mul(dram_channels, yclk);
1847         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1848
1849         return dfixed_trunc(bandwidth);
1850 }
1851
1852 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1853 {
1854         /* Calculate DRAM Bandwidth and the part allocated to display. */
1855         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1856         fixed20_12 yclk, dram_channels, bandwidth;
1857         fixed20_12 a;
1858
1859         a.full = dfixed_const(1000);
1860         yclk.full = dfixed_const(wm->yclk);
1861         yclk.full = dfixed_div(yclk, a);
1862         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1863         a.full = dfixed_const(10);
1864         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1865         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1866         bandwidth.full = dfixed_mul(dram_channels, yclk);
1867         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1868
1869         return dfixed_trunc(bandwidth);
1870 }
1871
1872 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1873 {
1874         /* Calculate the display Data return Bandwidth */
1875         fixed20_12 return_efficiency; /* 0.8 */
1876         fixed20_12 sclk, bandwidth;
1877         fixed20_12 a;
1878
1879         a.full = dfixed_const(1000);
1880         sclk.full = dfixed_const(wm->sclk);
1881         sclk.full = dfixed_div(sclk, a);
1882         a.full = dfixed_const(10);
1883         return_efficiency.full = dfixed_const(8);
1884         return_efficiency.full = dfixed_div(return_efficiency, a);
1885         a.full = dfixed_const(32);
1886         bandwidth.full = dfixed_mul(a, sclk);
1887         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1888
1889         return dfixed_trunc(bandwidth);
1890 }
1891
1892 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1893 {
1894         return 32;
1895 }
1896
1897 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1898 {
1899         /* Calculate the DMIF Request Bandwidth */
1900         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1901         fixed20_12 disp_clk, sclk, bandwidth;
1902         fixed20_12 a, b1, b2;
1903         u32 min_bandwidth;
1904
1905         a.full = dfixed_const(1000);
1906         disp_clk.full = dfixed_const(wm->disp_clk);
1907         disp_clk.full = dfixed_div(disp_clk, a);
1908         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1909         b1.full = dfixed_mul(a, disp_clk);
1910
1911         a.full = dfixed_const(1000);
1912         sclk.full = dfixed_const(wm->sclk);
1913         sclk.full = dfixed_div(sclk, a);
1914         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1915         b2.full = dfixed_mul(a, sclk);
1916
1917         a.full = dfixed_const(10);
1918         disp_clk_request_efficiency.full = dfixed_const(8);
1919         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1920
1921         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1922
1923         a.full = dfixed_const(min_bandwidth);
1924         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1925
1926         return dfixed_trunc(bandwidth);
1927 }
1928
1929 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1930 {
1931         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1932         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1933         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1934         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1935
1936         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1937 }
1938
1939 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1940 {
1941         /* Calculate the display mode Average Bandwidth
1942          * DisplayMode should contain the source and destination dimensions,
1943          * timing, etc.
1944          */
1945         fixed20_12 bpp;
1946         fixed20_12 line_time;
1947         fixed20_12 src_width;
1948         fixed20_12 bandwidth;
1949         fixed20_12 a;
1950
1951         a.full = dfixed_const(1000);
1952         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1953         line_time.full = dfixed_div(line_time, a);
1954         bpp.full = dfixed_const(wm->bytes_per_pixel);
1955         src_width.full = dfixed_const(wm->src_width);
1956         bandwidth.full = dfixed_mul(src_width, bpp);
1957         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1958         bandwidth.full = dfixed_div(bandwidth, line_time);
1959
1960         return dfixed_trunc(bandwidth);
1961 }
1962
1963 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1964 {
1965         /* First calcualte the latency in ns */
1966         u32 mc_latency = 2000; /* 2000 ns. */
1967         u32 available_bandwidth = dce6_available_bandwidth(wm);
1968         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1969         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1970         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1971         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1972                 (wm->num_heads * cursor_line_pair_return_time);
1973         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1974         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1975         u32 tmp, dmif_size = 12288;
1976         fixed20_12 a, b, c;
1977
1978         if (wm->num_heads == 0)
1979                 return 0;
1980
1981         a.full = dfixed_const(2);
1982         b.full = dfixed_const(1);
1983         if ((wm->vsc.full > a.full) ||
1984             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1985             (wm->vtaps >= 5) ||
1986             ((wm->vsc.full >= a.full) && wm->interlaced))
1987                 max_src_lines_per_dst_line = 4;
1988         else
1989                 max_src_lines_per_dst_line = 2;
1990
1991         a.full = dfixed_const(available_bandwidth);
1992         b.full = dfixed_const(wm->num_heads);
1993         a.full = dfixed_div(a, b);
1994
1995         b.full = dfixed_const(mc_latency + 512);
1996         c.full = dfixed_const(wm->disp_clk);
1997         b.full = dfixed_div(b, c);
1998
1999         c.full = dfixed_const(dmif_size);
2000         b.full = dfixed_div(c, b);
2001
2002         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2003
2004         b.full = dfixed_const(1000);
2005         c.full = dfixed_const(wm->disp_clk);
2006         b.full = dfixed_div(c, b);
2007         c.full = dfixed_const(wm->bytes_per_pixel);
2008         b.full = dfixed_mul(b, c);
2009
2010         lb_fill_bw = min(tmp, dfixed_trunc(b));
2011
2012         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2013         b.full = dfixed_const(1000);
2014         c.full = dfixed_const(lb_fill_bw);
2015         b.full = dfixed_div(c, b);
2016         a.full = dfixed_div(a, b);
2017         line_fill_time = dfixed_trunc(a);
2018
2019         if (line_fill_time < wm->active_time)
2020                 return latency;
2021         else
2022                 return latency + (line_fill_time - wm->active_time);
2023
2024 }
2025
2026 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2027 {
2028         if (dce6_average_bandwidth(wm) <=
2029             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2030                 return true;
2031         else
2032                 return false;
2033 };
2034
2035 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2036 {
2037         if (dce6_average_bandwidth(wm) <=
2038             (dce6_available_bandwidth(wm) / wm->num_heads))
2039                 return true;
2040         else
2041                 return false;
2042 };
2043
2044 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2045 {
2046         u32 lb_partitions = wm->lb_size / wm->src_width;
2047         u32 line_time = wm->active_time + wm->blank_time;
2048         u32 latency_tolerant_lines;
2049         u32 latency_hiding;
2050         fixed20_12 a;
2051
2052         a.full = dfixed_const(1);
2053         if (wm->vsc.full > a.full)
2054                 latency_tolerant_lines = 1;
2055         else {
2056                 if (lb_partitions <= (wm->vtaps + 1))
2057                         latency_tolerant_lines = 1;
2058                 else
2059                         latency_tolerant_lines = 2;
2060         }
2061
2062         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2063
2064         if (dce6_latency_watermark(wm) <= latency_hiding)
2065                 return true;
2066         else
2067                 return false;
2068 }
2069
2070 static void dce6_program_watermarks(struct radeon_device *rdev,
2071                                          struct radeon_crtc *radeon_crtc,
2072                                          u32 lb_size, u32 num_heads)
2073 {
2074         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2075         struct dce6_wm_params wm_low, wm_high;
2076         u32 dram_channels;
2077         u32 pixel_period;
2078         u32 line_time = 0;
2079         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2080         u32 priority_a_mark = 0, priority_b_mark = 0;
2081         u32 priority_a_cnt = PRIORITY_OFF;
2082         u32 priority_b_cnt = PRIORITY_OFF;
2083         u32 tmp, arb_control3;
2084         fixed20_12 a, b, c;
2085
2086         if (radeon_crtc->base.enabled && num_heads && mode) {
2087                 pixel_period = 1000000 / (u32)mode->clock;
2088                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2089                 priority_a_cnt = 0;
2090                 priority_b_cnt = 0;
2091
2092                 if (rdev->family == CHIP_ARUBA)
2093                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2094                 else
2095                         dram_channels = si_get_number_of_dram_channels(rdev);
2096
2097                 /* watermark for high clocks */
2098                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2099                         wm_high.yclk =
2100                                 radeon_dpm_get_mclk(rdev, false) * 10;
2101                         wm_high.sclk =
2102                                 radeon_dpm_get_sclk(rdev, false) * 10;
2103                 } else {
2104                         wm_high.yclk = rdev->pm.current_mclk * 10;
2105                         wm_high.sclk = rdev->pm.current_sclk * 10;
2106                 }
2107
2108                 wm_high.disp_clk = mode->clock;
2109                 wm_high.src_width = mode->crtc_hdisplay;
2110                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2111                 wm_high.blank_time = line_time - wm_high.active_time;
2112                 wm_high.interlaced = false;
2113                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2114                         wm_high.interlaced = true;
2115                 wm_high.vsc = radeon_crtc->vsc;
2116                 wm_high.vtaps = 1;
2117                 if (radeon_crtc->rmx_type != RMX_OFF)
2118                         wm_high.vtaps = 2;
2119                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2120                 wm_high.lb_size = lb_size;
2121                 wm_high.dram_channels = dram_channels;
2122                 wm_high.num_heads = num_heads;
2123
2124                 /* watermark for low clocks */
2125                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2126                         wm_low.yclk =
2127                                 radeon_dpm_get_mclk(rdev, true) * 10;
2128                         wm_low.sclk =
2129                                 radeon_dpm_get_sclk(rdev, true) * 10;
2130                 } else {
2131                         wm_low.yclk = rdev->pm.current_mclk * 10;
2132                         wm_low.sclk = rdev->pm.current_sclk * 10;
2133                 }
2134
2135                 wm_low.disp_clk = mode->clock;
2136                 wm_low.src_width = mode->crtc_hdisplay;
2137                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2138                 wm_low.blank_time = line_time - wm_low.active_time;
2139                 wm_low.interlaced = false;
2140                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2141                         wm_low.interlaced = true;
2142                 wm_low.vsc = radeon_crtc->vsc;
2143                 wm_low.vtaps = 1;
2144                 if (radeon_crtc->rmx_type != RMX_OFF)
2145                         wm_low.vtaps = 2;
2146                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2147                 wm_low.lb_size = lb_size;
2148                 wm_low.dram_channels = dram_channels;
2149                 wm_low.num_heads = num_heads;
2150
2151                 /* set for high clocks */
2152                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2153                 /* set for low clocks */
2154                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2155
2156                 /* possibly force display priority to high */
2157                 /* should really do this at mode validation time... */
2158                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2159                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2160                     !dce6_check_latency_hiding(&wm_high) ||
2161                     (rdev->disp_priority == 2)) {
2162                         DRM_DEBUG_KMS("force priority to high\n");
2163                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2164                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2165                 }
2166                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2167                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2168                     !dce6_check_latency_hiding(&wm_low) ||
2169                     (rdev->disp_priority == 2)) {
2170                         DRM_DEBUG_KMS("force priority to high\n");
2171                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2172                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2173                 }
2174
2175                 a.full = dfixed_const(1000);
2176                 b.full = dfixed_const(mode->clock);
2177                 b.full = dfixed_div(b, a);
2178                 c.full = dfixed_const(latency_watermark_a);
2179                 c.full = dfixed_mul(c, b);
2180                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2181                 c.full = dfixed_div(c, a);
2182                 a.full = dfixed_const(16);
2183                 c.full = dfixed_div(c, a);
2184                 priority_a_mark = dfixed_trunc(c);
2185                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2186
2187                 a.full = dfixed_const(1000);
2188                 b.full = dfixed_const(mode->clock);
2189                 b.full = dfixed_div(b, a);
2190                 c.full = dfixed_const(latency_watermark_b);
2191                 c.full = dfixed_mul(c, b);
2192                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2193                 c.full = dfixed_div(c, a);
2194                 a.full = dfixed_const(16);
2195                 c.full = dfixed_div(c, a);
2196                 priority_b_mark = dfixed_trunc(c);
2197                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2198         }
2199
2200         /* select wm A */
2201         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2202         tmp = arb_control3;
2203         tmp &= ~LATENCY_WATERMARK_MASK(3);
2204         tmp |= LATENCY_WATERMARK_MASK(1);
2205         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2206         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2207                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2208                 LATENCY_HIGH_WATERMARK(line_time)));
2209         /* select wm B */
2210         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2211         tmp &= ~LATENCY_WATERMARK_MASK(3);
2212         tmp |= LATENCY_WATERMARK_MASK(2);
2213         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2214         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2215                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2216                 LATENCY_HIGH_WATERMARK(line_time)));
2217         /* restore original selection */
2218         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2219
2220         /* write the priority marks */
2221         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2222         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2223
2224         /* save values for DPM */
2225         radeon_crtc->line_time = line_time;
2226         radeon_crtc->wm_high = latency_watermark_a;
2227         radeon_crtc->wm_low = latency_watermark_b;
2228 }
2229
2230 void dce6_bandwidth_update(struct radeon_device *rdev)
2231 {
2232         struct drm_display_mode *mode0 = NULL;
2233         struct drm_display_mode *mode1 = NULL;
2234         u32 num_heads = 0, lb_size;
2235         int i;
2236
2237         radeon_update_display_priority(rdev);
2238
2239         for (i = 0; i < rdev->num_crtc; i++) {
2240                 if (rdev->mode_info.crtcs[i]->base.enabled)
2241                         num_heads++;
2242         }
2243         for (i = 0; i < rdev->num_crtc; i += 2) {
2244                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2245                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2246                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2247                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2248                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2249                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2250         }
2251 }
2252
2253 /*
2254  * Core functions
2255  */
2256 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2257 {
2258         const u32 num_tile_mode_states = 32;
2259         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2260
2261         switch (rdev->config.si.mem_row_size_in_kb) {
2262         case 1:
2263                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2264                 break;
2265         case 2:
2266         default:
2267                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2268                 break;
2269         case 4:
2270                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2271                 break;
2272         }
2273
2274         if ((rdev->family == CHIP_TAHITI) ||
2275             (rdev->family == CHIP_PITCAIRN)) {
2276                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2277                         switch (reg_offset) {
2278                         case 0:  /* non-AA compressed depth or any compressed stencil */
2279                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2281                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2282                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2283                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2284                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2287                                 break;
2288                         case 1:  /* 2xAA/4xAA compressed depth only */
2289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2291                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2292                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2293                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2294                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2297                                 break;
2298                         case 2:  /* 8xAA compressed depth only */
2299                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2301                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2302                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2303                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2304                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2307                                 break;
2308                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2309                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2311                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2312                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2313                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2314                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2316                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2317                                 break;
2318                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2319                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2320                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2321                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2322                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2323                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2324                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2326                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2327                                 break;
2328                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2329                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2331                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2332                                                  TILE_SPLIT(split_equal_to_row_size) |
2333                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2334                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2336                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2337                                 break;
2338                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2342                                                  TILE_SPLIT(split_equal_to_row_size) |
2343                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2344                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2346                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2347                                 break;
2348                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2349                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2351                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2352                                                  TILE_SPLIT(split_equal_to_row_size) |
2353                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2354                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2356                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2357                                 break;
2358                         case 8:  /* 1D and 1D Array Surfaces */
2359                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2360                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2362                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2363                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2364                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2366                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2367                                 break;
2368                         case 9:  /* Displayable maps. */
2369                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2372                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2373                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2374                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2377                                 break;
2378                         case 10:  /* Display 8bpp. */
2379                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2380                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2381                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2382                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2383                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2384                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2387                                 break;
2388                         case 11:  /* Display 16bpp. */
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2392                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2393                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2394                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2396                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2397                                 break;
2398                         case 12:  /* Display 32bpp. */
2399                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2401                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2402                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2403                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2404                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2407                                 break;
2408                         case 13:  /* Thin. */
2409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2410                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2411                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2412                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2413                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2414                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2416                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2417                                 break;
2418                         case 14:  /* Thin 8 bpp. */
2419                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2421                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2422                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2423                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2424                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2426                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2427                                 break;
2428                         case 15:  /* Thin 16 bpp. */
2429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2431                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2432                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2433                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2434                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2436                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2437                                 break;
2438                         case 16:  /* Thin 32 bpp. */
2439                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2441                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2442                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2443                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2444                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2447                                 break;
2448                         case 17:  /* Thin 64 bpp. */
2449                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2451                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2452                                                  TILE_SPLIT(split_equal_to_row_size) |
2453                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2454                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2457                                 break;
2458                         case 21:  /* 8 bpp PRT. */
2459                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2461                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2462                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2463                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2464                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2465                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2467                                 break;
2468                         case 22:  /* 16 bpp PRT */
2469                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2470                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2471                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2472                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2473                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2474                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2476                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2477                                 break;
2478                         case 23:  /* 32 bpp PRT */
2479                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2481                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2482                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2483                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2484                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2486                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2487                                 break;
2488                         case 24:  /* 64 bpp PRT */
2489                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2490                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2491                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2492                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2493                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2494                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2497                                 break;
2498                         case 25:  /* 128 bpp PRT */
2499                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2501                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2502                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2503                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2504                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2507                                 break;
2508                         default:
2509                                 gb_tile_moden = 0;
2510                                 break;
2511                         }
2512                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2513                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2514                 }
2515         } else if ((rdev->family == CHIP_VERDE) ||
2516                    (rdev->family == CHIP_OLAND) ||
2517                    (rdev->family == CHIP_HAINAN)) {
2518                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2519                         switch (reg_offset) {
2520                         case 0:  /* non-AA compressed depth or any compressed stencil */
2521                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2523                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2524                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2525                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2526                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2529                                 break;
2530                         case 1:  /* 2xAA/4xAA compressed depth only */
2531                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2533                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2534                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2535                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2536                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2539                                 break;
2540                         case 2:  /* 8xAA compressed depth only */
2541                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2544                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2545                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2546                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2548                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2549                                 break;
2550                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2551                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2554                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2555                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2556                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2558                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2559                                 break;
2560                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2561                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2563                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2564                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2565                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2566                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2568                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2569                                 break;
2570                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2571                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2573                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2574                                                  TILE_SPLIT(split_equal_to_row_size) |
2575                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2576                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2578                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2579                                 break;
2580                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2581                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2583                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2584                                                  TILE_SPLIT(split_equal_to_row_size) |
2585                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2586                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2588                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2589                                 break;
2590                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2591                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2593                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2594                                                  TILE_SPLIT(split_equal_to_row_size) |
2595                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2596                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2598                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2599                                 break;
2600                         case 8:  /* 1D and 1D Array Surfaces */
2601                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2602                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2604                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2605                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2606                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2608                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2609                                 break;
2610                         case 9:  /* Displayable maps. */
2611                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2612                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2613                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2614                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2615                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2616                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2618                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2619                                 break;
2620                         case 10:  /* Display 8bpp. */
2621                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2624                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2625                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2626                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2629                                 break;
2630                         case 11:  /* Display 16bpp. */
2631                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2635                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2636                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2638                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2639                                 break;
2640                         case 12:  /* Display 32bpp. */
2641                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2643                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2645                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2646                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2649                                 break;
2650                         case 13:  /* Thin. */
2651                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2652                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2653                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2655                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2656                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2658                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2659                                 break;
2660                         case 14:  /* Thin 8 bpp. */
2661                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2663                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2665                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2666                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2668                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2669                                 break;
2670                         case 15:  /* Thin 16 bpp. */
2671                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2675                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2676                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2678                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2679                                 break;
2680                         case 16:  /* Thin 32 bpp. */
2681                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2683                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2684                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2685                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2686                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2689                                 break;
2690                         case 17:  /* Thin 64 bpp. */
2691                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2694                                                  TILE_SPLIT(split_equal_to_row_size) |
2695                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2696                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2698                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2699                                 break;
2700                         case 21:  /* 8 bpp PRT. */
2701                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2703                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2704                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2706                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2707                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2708                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2709                                 break;
2710                         case 22:  /* 16 bpp PRT */
2711                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2713                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2714                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2715                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2716                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2718                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2719                                 break;
2720                         case 23:  /* 32 bpp PRT */
2721                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2723                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2724                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2725                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2726                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2729                                 break;
2730                         case 24:  /* 64 bpp PRT */
2731                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2733                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2734                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2735                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2736                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2738                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2739                                 break;
2740                         case 25:  /* 128 bpp PRT */
2741                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2743                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2744                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2745                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2746                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2749                                 break;
2750                         default:
2751                                 gb_tile_moden = 0;
2752                                 break;
2753                         }
2754                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2755                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2756                 }
2757         } else
2758                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2759 }
2760
2761 static void si_select_se_sh(struct radeon_device *rdev,
2762                             u32 se_num, u32 sh_num)
2763 {
2764         u32 data = INSTANCE_BROADCAST_WRITES;
2765
2766         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2767                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2768         else if (se_num == 0xffffffff)
2769                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2770         else if (sh_num == 0xffffffff)
2771                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2772         else
2773                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2774         WREG32(GRBM_GFX_INDEX, data);
2775 }
2776
2777 static u32 si_create_bitmask(u32 bit_width)
2778 {
2779         u32 i, mask = 0;
2780
2781         for (i = 0; i < bit_width; i++) {
2782                 mask <<= 1;
2783                 mask |= 1;
2784         }
2785         return mask;
2786 }
2787
2788 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2789 {
2790         u32 data, mask;
2791
2792         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2793         if (data & 1)
2794                 data &= INACTIVE_CUS_MASK;
2795         else
2796                 data = 0;
2797         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2798
2799         data >>= INACTIVE_CUS_SHIFT;
2800
2801         mask = si_create_bitmask(cu_per_sh);
2802
2803         return ~data & mask;
2804 }
2805
2806 static void si_setup_spi(struct radeon_device *rdev,
2807                          u32 se_num, u32 sh_per_se,
2808                          u32 cu_per_sh)
2809 {
2810         int i, j, k;
2811         u32 data, mask, active_cu;
2812
2813         for (i = 0; i < se_num; i++) {
2814                 for (j = 0; j < sh_per_se; j++) {
2815                         si_select_se_sh(rdev, i, j);
2816                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2817                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2818
2819                         mask = 1;
2820                         for (k = 0; k < 16; k++) {
2821                                 mask <<= k;
2822                                 if (active_cu & mask) {
2823                                         data &= ~mask;
2824                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2825                                         break;
2826                                 }
2827                         }
2828                 }
2829         }
2830         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2831 }
2832
2833 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2834                               u32 max_rb_num, u32 se_num,
2835                               u32 sh_per_se)
2836 {
2837         u32 data, mask;
2838
2839         data = RREG32(CC_RB_BACKEND_DISABLE);
2840         if (data & 1)
2841                 data &= BACKEND_DISABLE_MASK;
2842         else
2843                 data = 0;
2844         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2845
2846         data >>= BACKEND_DISABLE_SHIFT;
2847
2848         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2849
2850         return data & mask;
2851 }
2852
2853 static void si_setup_rb(struct radeon_device *rdev,
2854                         u32 se_num, u32 sh_per_se,
2855                         u32 max_rb_num)
2856 {
2857         int i, j;
2858         u32 data, mask;
2859         u32 disabled_rbs = 0;
2860         u32 enabled_rbs = 0;
2861
2862         for (i = 0; i < se_num; i++) {
2863                 for (j = 0; j < sh_per_se; j++) {
2864                         si_select_se_sh(rdev, i, j);
2865                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2866                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2867                 }
2868         }
2869         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2870
2871         mask = 1;
2872         for (i = 0; i < max_rb_num; i++) {
2873                 if (!(disabled_rbs & mask))
2874                         enabled_rbs |= mask;
2875                 mask <<= 1;
2876         }
2877
2878         for (i = 0; i < se_num; i++) {
2879                 si_select_se_sh(rdev, i, 0xffffffff);
2880                 data = 0;
2881                 for (j = 0; j < sh_per_se; j++) {
2882                         switch (enabled_rbs & 3) {
2883                         case 1:
2884                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2885                                 break;
2886                         case 2:
2887                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2888                                 break;
2889                         case 3:
2890                         default:
2891                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2892                                 break;
2893                         }
2894                         enabled_rbs >>= 2;
2895                 }
2896                 WREG32(PA_SC_RASTER_CONFIG, data);
2897         }
2898         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2899 }
2900
2901 static void si_gpu_init(struct radeon_device *rdev)
2902 {
2903         u32 gb_addr_config = 0;
2904         u32 mc_shared_chmap, mc_arb_ramcfg;
2905         u32 sx_debug_1;
2906         u32 hdp_host_path_cntl;
2907         u32 tmp;
2908         int i, j;
2909
2910         switch (rdev->family) {
2911         case CHIP_TAHITI:
2912                 rdev->config.si.max_shader_engines = 2;
2913                 rdev->config.si.max_tile_pipes = 12;
2914                 rdev->config.si.max_cu_per_sh = 8;
2915                 rdev->config.si.max_sh_per_se = 2;
2916                 rdev->config.si.max_backends_per_se = 4;
2917                 rdev->config.si.max_texture_channel_caches = 12;
2918                 rdev->config.si.max_gprs = 256;
2919                 rdev->config.si.max_gs_threads = 32;
2920                 rdev->config.si.max_hw_contexts = 8;
2921
2922                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2923                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2924                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2925                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2926                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2927                 break;
2928         case CHIP_PITCAIRN:
2929                 rdev->config.si.max_shader_engines = 2;
2930                 rdev->config.si.max_tile_pipes = 8;
2931                 rdev->config.si.max_cu_per_sh = 5;
2932                 rdev->config.si.max_sh_per_se = 2;
2933                 rdev->config.si.max_backends_per_se = 4;
2934                 rdev->config.si.max_texture_channel_caches = 8;
2935                 rdev->config.si.max_gprs = 256;
2936                 rdev->config.si.max_gs_threads = 32;
2937                 rdev->config.si.max_hw_contexts = 8;
2938
2939                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2940                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2941                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2942                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2943                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2944                 break;
2945         case CHIP_VERDE:
2946         default:
2947                 rdev->config.si.max_shader_engines = 1;
2948                 rdev->config.si.max_tile_pipes = 4;
2949                 rdev->config.si.max_cu_per_sh = 5;
2950                 rdev->config.si.max_sh_per_se = 2;
2951                 rdev->config.si.max_backends_per_se = 4;
2952                 rdev->config.si.max_texture_channel_caches = 4;
2953                 rdev->config.si.max_gprs = 256;
2954                 rdev->config.si.max_gs_threads = 32;
2955                 rdev->config.si.max_hw_contexts = 8;
2956
2957                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2958                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2959                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2960                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2961                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2962                 break;
2963         case CHIP_OLAND:
2964                 rdev->config.si.max_shader_engines = 1;
2965                 rdev->config.si.max_tile_pipes = 4;
2966                 rdev->config.si.max_cu_per_sh = 6;
2967                 rdev->config.si.max_sh_per_se = 1;
2968                 rdev->config.si.max_backends_per_se = 2;
2969                 rdev->config.si.max_texture_channel_caches = 4;
2970                 rdev->config.si.max_gprs = 256;
2971                 rdev->config.si.max_gs_threads = 16;
2972                 rdev->config.si.max_hw_contexts = 8;
2973
2974                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2975                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2976                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2977                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2978                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2979                 break;
2980         case CHIP_HAINAN:
2981                 rdev->config.si.max_shader_engines = 1;
2982                 rdev->config.si.max_tile_pipes = 4;
2983                 rdev->config.si.max_cu_per_sh = 5;
2984                 rdev->config.si.max_sh_per_se = 1;
2985                 rdev->config.si.max_backends_per_se = 1;
2986                 rdev->config.si.max_texture_channel_caches = 2;
2987                 rdev->config.si.max_gprs = 256;
2988                 rdev->config.si.max_gs_threads = 16;
2989                 rdev->config.si.max_hw_contexts = 8;
2990
2991                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2992                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2993                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2994                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2995                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2996                 break;
2997         }
2998
2999         /* Initialize HDP */
3000         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3001                 WREG32((0x2c14 + j), 0x00000000);
3002                 WREG32((0x2c18 + j), 0x00000000);
3003                 WREG32((0x2c1c + j), 0x00000000);
3004                 WREG32((0x2c20 + j), 0x00000000);
3005                 WREG32((0x2c24 + j), 0x00000000);
3006         }
3007
3008         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3009
3010         evergreen_fix_pci_max_read_req_size(rdev);
3011
3012         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3013
3014         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3015         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3016
3017         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3018         rdev->config.si.mem_max_burst_length_bytes = 256;
3019         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3020         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3021         if (rdev->config.si.mem_row_size_in_kb > 4)
3022                 rdev->config.si.mem_row_size_in_kb = 4;
3023         /* XXX use MC settings? */
3024         rdev->config.si.shader_engine_tile_size = 32;
3025         rdev->config.si.num_gpus = 1;
3026         rdev->config.si.multi_gpu_tile_size = 64;
3027
3028         /* fix up row size */
3029         gb_addr_config &= ~ROW_SIZE_MASK;
3030         switch (rdev->config.si.mem_row_size_in_kb) {
3031         case 1:
3032         default:
3033                 gb_addr_config |= ROW_SIZE(0);
3034                 break;
3035         case 2:
3036                 gb_addr_config |= ROW_SIZE(1);
3037                 break;
3038         case 4:
3039                 gb_addr_config |= ROW_SIZE(2);
3040                 break;
3041         }
3042
3043         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3044          * not have bank info, so create a custom tiling dword.
3045          * bits 3:0   num_pipes
3046          * bits 7:4   num_banks
3047          * bits 11:8  group_size
3048          * bits 15:12 row_size
3049          */
3050         rdev->config.si.tile_config = 0;
3051         switch (rdev->config.si.num_tile_pipes) {
3052         case 1:
3053                 rdev->config.si.tile_config |= (0 << 0);
3054                 break;
3055         case 2:
3056                 rdev->config.si.tile_config |= (1 << 0);
3057                 break;
3058         case 4:
3059                 rdev->config.si.tile_config |= (2 << 0);
3060                 break;
3061         case 8:
3062         default:
3063                 /* XXX what about 12? */
3064                 rdev->config.si.tile_config |= (3 << 0);
3065                 break;
3066         }       
3067         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3068         case 0: /* four banks */
3069                 rdev->config.si.tile_config |= 0 << 4;
3070                 break;
3071         case 1: /* eight banks */
3072                 rdev->config.si.tile_config |= 1 << 4;
3073                 break;
3074         case 2: /* sixteen banks */
3075         default:
3076                 rdev->config.si.tile_config |= 2 << 4;
3077                 break;
3078         }
3079         rdev->config.si.tile_config |=
3080                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3081         rdev->config.si.tile_config |=
3082                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3083
3084         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3085         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3086         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3087         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3088         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3089         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3090         if (rdev->has_uvd) {
3091                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3092                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3093                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3094         }
3095
3096         si_tiling_mode_table_init(rdev);
3097
3098         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3099                     rdev->config.si.max_sh_per_se,
3100                     rdev->config.si.max_backends_per_se);
3101
3102         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3103                      rdev->config.si.max_sh_per_se,
3104                      rdev->config.si.max_cu_per_sh);
3105
3106
3107         /* set HW defaults for 3D engine */
3108         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3109                                      ROQ_IB2_START(0x2b)));
3110         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3111
3112         sx_debug_1 = RREG32(SX_DEBUG_1);
3113         WREG32(SX_DEBUG_1, sx_debug_1);
3114
3115         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3116
3117         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3118                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3119                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3120                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3121
3122         WREG32(VGT_NUM_INSTANCES, 1);
3123
3124         WREG32(CP_PERFMON_CNTL, 0);
3125
3126         WREG32(SQ_CONFIG, 0);
3127
3128         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3129                                           FORCE_EOV_MAX_REZ_CNT(255)));
3130
3131         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3132                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3133
3134         WREG32(VGT_GS_VERTEX_REUSE, 16);
3135         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3136
3137         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3138         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3139         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3140         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3141         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3142         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3143         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3144         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3145
3146         tmp = RREG32(HDP_MISC_CNTL);
3147         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3148         WREG32(HDP_MISC_CNTL, tmp);
3149
3150         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3151         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3152
3153         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3154
3155         udelay(50);
3156 }
3157
3158 /*
3159  * GPU scratch registers helpers function.
3160  */
3161 static void si_scratch_init(struct radeon_device *rdev)
3162 {
3163         int i;
3164
3165         rdev->scratch.num_reg = 7;
3166         rdev->scratch.reg_base = SCRATCH_REG0;
3167         for (i = 0; i < rdev->scratch.num_reg; i++) {
3168                 rdev->scratch.free[i] = true;
3169                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3170         }
3171 }
3172
3173 void si_fence_ring_emit(struct radeon_device *rdev,
3174                         struct radeon_fence *fence)
3175 {
3176         struct radeon_ring *ring = &rdev->ring[fence->ring];
3177         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3178
3179         /* flush read cache over gart */
3180         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3181         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3182         radeon_ring_write(ring, 0);
3183         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3184         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3185                           PACKET3_TC_ACTION_ENA |
3186                           PACKET3_SH_KCACHE_ACTION_ENA |
3187                           PACKET3_SH_ICACHE_ACTION_ENA);
3188         radeon_ring_write(ring, 0xFFFFFFFF);
3189         radeon_ring_write(ring, 0);
3190         radeon_ring_write(ring, 10); /* poll interval */
3191         /* EVENT_WRITE_EOP - flush caches, send int */
3192         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3193         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3194         radeon_ring_write(ring, addr & 0xffffffff);
3195         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3196         radeon_ring_write(ring, fence->seq);
3197         radeon_ring_write(ring, 0);
3198 }
3199
3200 /*
3201  * IB stuff
3202  */
3203 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3204 {
3205         struct radeon_ring *ring = &rdev->ring[ib->ring];
3206         u32 header;
3207
3208         if (ib->is_const_ib) {
3209                 /* set switch buffer packet before const IB */
3210                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3211                 radeon_ring_write(ring, 0);
3212
3213                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3214         } else {
3215                 u32 next_rptr;
3216                 if (ring->rptr_save_reg) {
3217                         next_rptr = ring->wptr + 3 + 4 + 8;
3218                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3219                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3220                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3221                         radeon_ring_write(ring, next_rptr);
3222                 } else if (rdev->wb.enabled) {
3223                         next_rptr = ring->wptr + 5 + 4 + 8;
3224                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3225                         radeon_ring_write(ring, (1 << 8));
3226                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3227                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3228                         radeon_ring_write(ring, next_rptr);
3229                 }
3230
3231                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3232         }
3233
3234         radeon_ring_write(ring, header);
3235         radeon_ring_write(ring,
3236 #ifdef __BIG_ENDIAN
3237                           (2 << 0) |
3238 #endif
3239                           (ib->gpu_addr & 0xFFFFFFFC));
3240         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3241         radeon_ring_write(ring, ib->length_dw |
3242                           (ib->vm ? (ib->vm->id << 24) : 0));
3243
3244         if (!ib->is_const_ib) {
3245                 /* flush read cache over gart for this vmid */
3246                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3247                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3248                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3249                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3250                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3251                                   PACKET3_TC_ACTION_ENA |
3252                                   PACKET3_SH_KCACHE_ACTION_ENA |
3253                                   PACKET3_SH_ICACHE_ACTION_ENA);
3254                 radeon_ring_write(ring, 0xFFFFFFFF);
3255                 radeon_ring_write(ring, 0);
3256                 radeon_ring_write(ring, 10); /* poll interval */
3257         }
3258 }
3259
3260 /*
3261  * CP.
3262  */
3263 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3264 {
3265         if (enable)
3266                 WREG32(CP_ME_CNTL, 0);
3267         else {
3268                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3269                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3270                 WREG32(SCRATCH_UMSK, 0);
3271                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3272                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3273                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3274         }
3275         udelay(50);
3276 }
3277
3278 static int si_cp_load_microcode(struct radeon_device *rdev)
3279 {
3280         const __be32 *fw_data;
3281         int i;
3282
3283         if (!rdev->me_fw || !rdev->pfp_fw)
3284                 return -EINVAL;
3285
3286         si_cp_enable(rdev, false);
3287
3288         /* PFP */
3289         fw_data = (const __be32 *)rdev->pfp_fw->data;
3290         WREG32(CP_PFP_UCODE_ADDR, 0);
3291         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3292                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3293         WREG32(CP_PFP_UCODE_ADDR, 0);
3294
3295         /* CE */
3296         fw_data = (const __be32 *)rdev->ce_fw->data;
3297         WREG32(CP_CE_UCODE_ADDR, 0);
3298         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3299                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3300         WREG32(CP_CE_UCODE_ADDR, 0);
3301
3302         /* ME */
3303         fw_data = (const __be32 *)rdev->me_fw->data;
3304         WREG32(CP_ME_RAM_WADDR, 0);
3305         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3306                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3307         WREG32(CP_ME_RAM_WADDR, 0);
3308
3309         WREG32(CP_PFP_UCODE_ADDR, 0);
3310         WREG32(CP_CE_UCODE_ADDR, 0);
3311         WREG32(CP_ME_RAM_WADDR, 0);
3312         WREG32(CP_ME_RAM_RADDR, 0);
3313         return 0;
3314 }
3315
3316 static int si_cp_start(struct radeon_device *rdev)
3317 {
3318         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3319         int r, i;
3320
3321         r = radeon_ring_lock(rdev, ring, 7 + 4);
3322         if (r) {
3323                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3324                 return r;
3325         }
3326         /* init the CP */
3327         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3328         radeon_ring_write(ring, 0x1);
3329         radeon_ring_write(ring, 0x0);
3330         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3331         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3332         radeon_ring_write(ring, 0);
3333         radeon_ring_write(ring, 0);
3334
3335         /* init the CE partitions */
3336         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3337         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3338         radeon_ring_write(ring, 0xc000);
3339         radeon_ring_write(ring, 0xe000);
3340         radeon_ring_unlock_commit(rdev, ring);
3341
3342         si_cp_enable(rdev, true);
3343
3344         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3345         if (r) {
3346                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3347                 return r;
3348         }
3349
3350         /* setup clear context state */
3351         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3352         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3353
3354         for (i = 0; i < si_default_size; i++)
3355                 radeon_ring_write(ring, si_default_state[i]);
3356
3357         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3358         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3359
3360         /* set clear context state */
3361         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3362         radeon_ring_write(ring, 0);
3363
3364         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3365         radeon_ring_write(ring, 0x00000316);
3366         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3367         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3368
3369         radeon_ring_unlock_commit(rdev, ring);
3370
3371         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3372                 ring = &rdev->ring[i];
3373                 r = radeon_ring_lock(rdev, ring, 2);
3374
3375                 /* clear the compute context state */
3376                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3377                 radeon_ring_write(ring, 0);
3378
3379                 radeon_ring_unlock_commit(rdev, ring);
3380         }
3381
3382         return 0;
3383 }
3384
3385 static void si_cp_fini(struct radeon_device *rdev)
3386 {
3387         struct radeon_ring *ring;
3388         si_cp_enable(rdev, false);
3389
3390         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3391         radeon_ring_fini(rdev, ring);
3392         radeon_scratch_free(rdev, ring->rptr_save_reg);
3393
3394         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3395         radeon_ring_fini(rdev, ring);
3396         radeon_scratch_free(rdev, ring->rptr_save_reg);
3397
3398         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3399         radeon_ring_fini(rdev, ring);
3400         radeon_scratch_free(rdev, ring->rptr_save_reg);
3401 }
3402
3403 static int si_cp_resume(struct radeon_device *rdev)
3404 {
3405         struct radeon_ring *ring;
3406         u32 tmp;
3407         u32 rb_bufsz;
3408         int r;
3409
3410         si_enable_gui_idle_interrupt(rdev, false);
3411
3412         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3413         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3414
3415         /* Set the write pointer delay */
3416         WREG32(CP_RB_WPTR_DELAY, 0);
3417
3418         WREG32(CP_DEBUG, 0);
3419         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3420
3421         /* ring 0 - compute and gfx */
3422         /* Set ring buffer size */
3423         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3424         rb_bufsz = order_base_2(ring->ring_size / 8);
3425         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3426 #ifdef __BIG_ENDIAN
3427         tmp |= BUF_SWAP_32BIT;
3428 #endif
3429         WREG32(CP_RB0_CNTL, tmp);
3430
3431         /* Initialize the ring buffer's read and write pointers */
3432         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3433         ring->wptr = 0;
3434         WREG32(CP_RB0_WPTR, ring->wptr);
3435
3436         /* set the wb address whether it's enabled or not */
3437         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3438         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3439
3440         if (rdev->wb.enabled)
3441                 WREG32(SCRATCH_UMSK, 0xff);
3442         else {
3443                 tmp |= RB_NO_UPDATE;
3444                 WREG32(SCRATCH_UMSK, 0);
3445         }
3446
3447         mdelay(1);
3448         WREG32(CP_RB0_CNTL, tmp);
3449
3450         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3451
3452         ring->rptr = RREG32(CP_RB0_RPTR);
3453
3454         /* ring1  - compute only */
3455         /* Set ring buffer size */
3456         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3457         rb_bufsz = order_base_2(ring->ring_size / 8);
3458         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3459 #ifdef __BIG_ENDIAN
3460         tmp |= BUF_SWAP_32BIT;
3461 #endif
3462         WREG32(CP_RB1_CNTL, tmp);
3463
3464         /* Initialize the ring buffer's read and write pointers */
3465         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3466         ring->wptr = 0;
3467         WREG32(CP_RB1_WPTR, ring->wptr);
3468
3469         /* set the wb address whether it's enabled or not */
3470         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3471         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3472
3473         mdelay(1);
3474         WREG32(CP_RB1_CNTL, tmp);
3475
3476         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3477
3478         ring->rptr = RREG32(CP_RB1_RPTR);
3479
3480         /* ring2 - compute only */
3481         /* Set ring buffer size */
3482         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3483         rb_bufsz = order_base_2(ring->ring_size / 8);
3484         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3485 #ifdef __BIG_ENDIAN
3486         tmp |= BUF_SWAP_32BIT;
3487 #endif
3488         WREG32(CP_RB2_CNTL, tmp);
3489
3490         /* Initialize the ring buffer's read and write pointers */
3491         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3492         ring->wptr = 0;
3493         WREG32(CP_RB2_WPTR, ring->wptr);
3494
3495         /* set the wb address whether it's enabled or not */
3496         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3497         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3498
3499         mdelay(1);
3500         WREG32(CP_RB2_CNTL, tmp);
3501
3502         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3503
3504         ring->rptr = RREG32(CP_RB2_RPTR);
3505
3506         /* start the rings */
3507         si_cp_start(rdev);
3508         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3509         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3510         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3511         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3512         if (r) {
3513                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3514                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3515                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3516                 return r;
3517         }
3518         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3519         if (r) {
3520                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3521         }
3522         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3523         if (r) {
3524                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3525         }
3526
3527         si_enable_gui_idle_interrupt(rdev, true);
3528
3529         return 0;
3530 }
3531
3532 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3533 {
3534         u32 reset_mask = 0;
3535         u32 tmp;
3536
3537         /* GRBM_STATUS */
3538         tmp = RREG32(GRBM_STATUS);
3539         if (tmp & (PA_BUSY | SC_BUSY |
3540                    BCI_BUSY | SX_BUSY |
3541                    TA_BUSY | VGT_BUSY |
3542                    DB_BUSY | CB_BUSY |
3543                    GDS_BUSY | SPI_BUSY |
3544                    IA_BUSY | IA_BUSY_NO_DMA))
3545                 reset_mask |= RADEON_RESET_GFX;
3546
3547         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3548                    CP_BUSY | CP_COHERENCY_BUSY))
3549                 reset_mask |= RADEON_RESET_CP;
3550
3551         if (tmp & GRBM_EE_BUSY)
3552                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3553
3554         /* GRBM_STATUS2 */
3555         tmp = RREG32(GRBM_STATUS2);
3556         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3557                 reset_mask |= RADEON_RESET_RLC;
3558
3559         /* DMA_STATUS_REG 0 */
3560         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3561         if (!(tmp & DMA_IDLE))
3562                 reset_mask |= RADEON_RESET_DMA;
3563
3564         /* DMA_STATUS_REG 1 */
3565         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3566         if (!(tmp & DMA_IDLE))
3567                 reset_mask |= RADEON_RESET_DMA1;
3568
3569         /* SRBM_STATUS2 */
3570         tmp = RREG32(SRBM_STATUS2);
3571         if (tmp & DMA_BUSY)
3572                 reset_mask |= RADEON_RESET_DMA;
3573
3574         if (tmp & DMA1_BUSY)
3575                 reset_mask |= RADEON_RESET_DMA1;
3576
3577         /* SRBM_STATUS */
3578         tmp = RREG32(SRBM_STATUS);
3579
3580         if (tmp & IH_BUSY)
3581                 reset_mask |= RADEON_RESET_IH;
3582
3583         if (tmp & SEM_BUSY)
3584                 reset_mask |= RADEON_RESET_SEM;
3585
3586         if (tmp & GRBM_RQ_PENDING)
3587                 reset_mask |= RADEON_RESET_GRBM;
3588
3589         if (tmp & VMC_BUSY)
3590                 reset_mask |= RADEON_RESET_VMC;
3591
3592         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3593                    MCC_BUSY | MCD_BUSY))
3594                 reset_mask |= RADEON_RESET_MC;
3595
3596         if (evergreen_is_display_hung(rdev))
3597                 reset_mask |= RADEON_RESET_DISPLAY;
3598
3599         /* VM_L2_STATUS */
3600         tmp = RREG32(VM_L2_STATUS);
3601         if (tmp & L2_BUSY)
3602                 reset_mask |= RADEON_RESET_VMC;
3603
3604         /* Skip MC reset as it's mostly likely not hung, just busy */
3605         if (reset_mask & RADEON_RESET_MC) {
3606                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3607                 reset_mask &= ~RADEON_RESET_MC;
3608         }
3609
3610         return reset_mask;
3611 }
3612
3613 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3614 {
3615         struct evergreen_mc_save save;
3616         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3617         u32 tmp;
3618
3619         if (reset_mask == 0)
3620                 return;
3621
3622         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3623
3624         evergreen_print_gpu_status_regs(rdev);
3625         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3626                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3627         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3628                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3629
3630         /* disable PG/CG */
3631         si_fini_pg(rdev);
3632         si_fini_cg(rdev);
3633
3634         /* stop the rlc */
3635         si_rlc_stop(rdev);
3636
3637         /* Disable CP parsing/prefetching */
3638         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3639
3640         if (reset_mask & RADEON_RESET_DMA) {
3641                 /* dma0 */
3642                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3643                 tmp &= ~DMA_RB_ENABLE;
3644                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3645         }
3646         if (reset_mask & RADEON_RESET_DMA1) {
3647                 /* dma1 */
3648                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3649                 tmp &= ~DMA_RB_ENABLE;
3650                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3651         }
3652
3653         udelay(50);
3654
3655         evergreen_mc_stop(rdev, &save);
3656         if (evergreen_mc_wait_for_idle(rdev)) {
3657                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3658         }
3659
3660         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3661                 grbm_soft_reset = SOFT_RESET_CB |
3662                         SOFT_RESET_DB |
3663                         SOFT_RESET_GDS |
3664                         SOFT_RESET_PA |
3665                         SOFT_RESET_SC |
3666                         SOFT_RESET_BCI |
3667                         SOFT_RESET_SPI |
3668                         SOFT_RESET_SX |
3669                         SOFT_RESET_TC |
3670                         SOFT_RESET_TA |
3671                         SOFT_RESET_VGT |
3672                         SOFT_RESET_IA;
3673         }
3674
3675         if (reset_mask & RADEON_RESET_CP) {
3676                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3677
3678                 srbm_soft_reset |= SOFT_RESET_GRBM;
3679         }
3680
3681         if (reset_mask & RADEON_RESET_DMA)
3682                 srbm_soft_reset |= SOFT_RESET_DMA;
3683
3684         if (reset_mask & RADEON_RESET_DMA1)
3685                 srbm_soft_reset |= SOFT_RESET_DMA1;
3686
3687         if (reset_mask & RADEON_RESET_DISPLAY)
3688                 srbm_soft_reset |= SOFT_RESET_DC;
3689
3690         if (reset_mask & RADEON_RESET_RLC)
3691                 grbm_soft_reset |= SOFT_RESET_RLC;
3692
3693         if (reset_mask & RADEON_RESET_SEM)
3694                 srbm_soft_reset |= SOFT_RESET_SEM;
3695
3696         if (reset_mask & RADEON_RESET_IH)
3697                 srbm_soft_reset |= SOFT_RESET_IH;
3698
3699         if (reset_mask & RADEON_RESET_GRBM)
3700                 srbm_soft_reset |= SOFT_RESET_GRBM;
3701
3702         if (reset_mask & RADEON_RESET_VMC)
3703                 srbm_soft_reset |= SOFT_RESET_VMC;
3704
3705         if (reset_mask & RADEON_RESET_MC)
3706                 srbm_soft_reset |= SOFT_RESET_MC;
3707
3708         if (grbm_soft_reset) {
3709                 tmp = RREG32(GRBM_SOFT_RESET);
3710                 tmp |= grbm_soft_reset;
3711                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3712                 WREG32(GRBM_SOFT_RESET, tmp);
3713                 tmp = RREG32(GRBM_SOFT_RESET);
3714
3715                 udelay(50);
3716
3717                 tmp &= ~grbm_soft_reset;
3718                 WREG32(GRBM_SOFT_RESET, tmp);
3719                 tmp = RREG32(GRBM_SOFT_RESET);
3720         }
3721
3722         if (srbm_soft_reset) {
3723                 tmp = RREG32(SRBM_SOFT_RESET);
3724                 tmp |= srbm_soft_reset;
3725                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3726                 WREG32(SRBM_SOFT_RESET, tmp);
3727                 tmp = RREG32(SRBM_SOFT_RESET);
3728
3729                 udelay(50);
3730
3731                 tmp &= ~srbm_soft_reset;
3732                 WREG32(SRBM_SOFT_RESET, tmp);
3733                 tmp = RREG32(SRBM_SOFT_RESET);
3734         }
3735
3736         /* Wait a little for things to settle down */
3737         udelay(50);
3738
3739         evergreen_mc_resume(rdev, &save);
3740         udelay(50);
3741
3742         evergreen_print_gpu_status_regs(rdev);
3743 }
3744
3745 int si_asic_reset(struct radeon_device *rdev)
3746 {
3747         u32 reset_mask;
3748
3749         reset_mask = si_gpu_check_soft_reset(rdev);
3750
3751         if (reset_mask)
3752                 r600_set_bios_scratch_engine_hung(rdev, true);
3753
3754         si_gpu_soft_reset(rdev, reset_mask);
3755
3756         reset_mask = si_gpu_check_soft_reset(rdev);
3757
3758         if (!reset_mask)
3759                 r600_set_bios_scratch_engine_hung(rdev, false);
3760
3761         return 0;
3762 }
3763
3764 /**
3765  * si_gfx_is_lockup - Check if the GFX engine is locked up
3766  *
3767  * @rdev: radeon_device pointer
3768  * @ring: radeon_ring structure holding ring information
3769  *
3770  * Check if the GFX engine is locked up.
3771  * Returns true if the engine appears to be locked up, false if not.
3772  */
3773 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3774 {
3775         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3776
3777         if (!(reset_mask & (RADEON_RESET_GFX |
3778                             RADEON_RESET_COMPUTE |
3779                             RADEON_RESET_CP))) {
3780                 radeon_ring_lockup_update(ring);
3781                 return false;
3782         }
3783         /* force CP activities */
3784         radeon_ring_force_activity(rdev, ring);
3785         return radeon_ring_test_lockup(rdev, ring);
3786 }
3787
3788 /* MC */
3789 static void si_mc_program(struct radeon_device *rdev)
3790 {
3791         struct evergreen_mc_save save;
3792         u32 tmp;
3793         int i, j;
3794
3795         /* Initialize HDP */
3796         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3797                 WREG32((0x2c14 + j), 0x00000000);
3798                 WREG32((0x2c18 + j), 0x00000000);
3799                 WREG32((0x2c1c + j), 0x00000000);
3800                 WREG32((0x2c20 + j), 0x00000000);
3801                 WREG32((0x2c24 + j), 0x00000000);
3802         }
3803         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3804
3805         evergreen_mc_stop(rdev, &save);
3806         if (radeon_mc_wait_for_idle(rdev)) {
3807                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3808         }
3809         if (!ASIC_IS_NODCE(rdev))
3810                 /* Lockout access through VGA aperture*/
3811                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3812         /* Update configuration */
3813         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3814                rdev->mc.vram_start >> 12);
3815         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3816                rdev->mc.vram_end >> 12);
3817         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3818                rdev->vram_scratch.gpu_addr >> 12);
3819         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3820         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3821         WREG32(MC_VM_FB_LOCATION, tmp);
3822         /* XXX double check these! */
3823         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3824         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3825         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3826         WREG32(MC_VM_AGP_BASE, 0);
3827         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3828         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3829         if (radeon_mc_wait_for_idle(rdev)) {
3830                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3831         }
3832         evergreen_mc_resume(rdev, &save);
3833         if (!ASIC_IS_NODCE(rdev)) {
3834                 /* we need to own VRAM, so turn off the VGA renderer here
3835                  * to stop it overwriting our objects */
3836                 rv515_vga_render_disable(rdev);
3837         }
3838 }
3839
3840 void si_vram_gtt_location(struct radeon_device *rdev,
3841                           struct radeon_mc *mc)
3842 {
3843         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3844                 /* leave room for at least 1024M GTT */
3845                 dev_warn(rdev->dev, "limiting VRAM\n");
3846                 mc->real_vram_size = 0xFFC0000000ULL;
3847                 mc->mc_vram_size = 0xFFC0000000ULL;
3848         }
3849         radeon_vram_location(rdev, &rdev->mc, 0);
3850         rdev->mc.gtt_base_align = 0;
3851         radeon_gtt_location(rdev, mc);
3852 }
3853
3854 static int si_mc_init(struct radeon_device *rdev)
3855 {
3856         u32 tmp;
3857         int chansize, numchan;
3858
3859         /* Get VRAM informations */
3860         rdev->mc.vram_is_ddr = true;
3861         tmp = RREG32(MC_ARB_RAMCFG);
3862         if (tmp & CHANSIZE_OVERRIDE) {
3863                 chansize = 16;
3864         } else if (tmp & CHANSIZE_MASK) {
3865                 chansize = 64;
3866         } else {
3867                 chansize = 32;
3868         }
3869         tmp = RREG32(MC_SHARED_CHMAP);
3870         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3871         case 0:
3872         default:
3873                 numchan = 1;
3874                 break;
3875         case 1:
3876                 numchan = 2;
3877                 break;
3878         case 2:
3879                 numchan = 4;
3880                 break;
3881         case 3:
3882                 numchan = 8;
3883                 break;
3884         case 4:
3885                 numchan = 3;
3886                 break;
3887         case 5:
3888                 numchan = 6;
3889                 break;
3890         case 6:
3891                 numchan = 10;
3892                 break;
3893         case 7:
3894                 numchan = 12;
3895                 break;
3896         case 8:
3897                 numchan = 16;
3898                 break;
3899         }
3900         rdev->mc.vram_width = numchan * chansize;
3901         /* Could aper size report 0 ? */
3902         rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
3903         rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
3904         /* size in MB on si */
3905         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3906         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3907         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3908         si_vram_gtt_location(rdev, &rdev->mc);
3909         radeon_update_bandwidth_info(rdev);
3910
3911         return 0;
3912 }
3913
3914 /*
3915  * GART
3916  */
3917 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3918 {
3919         /* flush hdp cache */
3920         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3921
3922         /* bits 0-15 are the VM contexts0-15 */
3923         WREG32(VM_INVALIDATE_REQUEST, 1);
3924 }
3925
3926 static int si_pcie_gart_enable(struct radeon_device *rdev)
3927 {
3928         int r, i;
3929
3930         if (rdev->gart.robj == NULL) {
3931                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3932                 return -EINVAL;
3933         }
3934         r = radeon_gart_table_vram_pin(rdev);
3935         if (r)
3936                 return r;
3937         radeon_gart_restore(rdev);
3938         /* Setup TLB control */
3939         WREG32(MC_VM_MX_L1_TLB_CNTL,
3940                (0xA << 7) |
3941                ENABLE_L1_TLB |
3942                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3943                ENABLE_ADVANCED_DRIVER_MODEL |
3944                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3945         /* Setup L2 cache */
3946         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3947                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3948                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3949                EFFECTIVE_L2_QUEUE_SIZE(7) |
3950                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3951         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3952         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3953                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3954         /* setup context0 */
3955         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3956         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3957         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3958         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3959                         (u32)(rdev->dummy_page.addr >> 12));
3960         WREG32(VM_CONTEXT0_CNTL2, 0);
3961         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3962                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3963
3964         WREG32(0x15D4, 0);
3965         WREG32(0x15D8, 0);
3966         WREG32(0x15DC, 0);
3967
3968         /* empty context1-15 */
3969         /* set vm size, must be a multiple of 4 */
3970         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3971         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3972         /* Assign the pt base to something valid for now; the pts used for
3973          * the VMs are determined by the application and setup and assigned
3974          * on the fly in the vm part of radeon_gart.c
3975          */
3976         for (i = 1; i < 16; i++) {
3977                 if (i < 8)
3978                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3979                                rdev->gart.table_addr >> 12);
3980                 else
3981                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3982                                rdev->gart.table_addr >> 12);
3983         }
3984
3985         /* enable context1-15 */
3986         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3987                (u32)(rdev->dummy_page.addr >> 12));
3988         WREG32(VM_CONTEXT1_CNTL2, 4);
3989         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3990                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3991                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3992                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3993                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3994                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3995                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3996                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3997                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3998                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3999                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4000                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4001                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4002
4003         si_pcie_gart_tlb_flush(rdev);
4004         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4005                  (unsigned)(rdev->mc.gtt_size >> 20),
4006                  (unsigned long long)rdev->gart.table_addr);
4007         rdev->gart.ready = true;
4008         return 0;
4009 }
4010
4011 static void si_pcie_gart_disable(struct radeon_device *rdev)
4012 {
4013         /* Disable all tables */
4014         WREG32(VM_CONTEXT0_CNTL, 0);
4015         WREG32(VM_CONTEXT1_CNTL, 0);
4016         /* Setup TLB control */
4017         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4018                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4019         /* Setup L2 cache */
4020         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4021                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4022                EFFECTIVE_L2_QUEUE_SIZE(7) |
4023                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4024         WREG32(VM_L2_CNTL2, 0);
4025         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4026                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4027         radeon_gart_table_vram_unpin(rdev);
4028 }
4029
4030 static void si_pcie_gart_fini(struct radeon_device *rdev)
4031 {
4032         si_pcie_gart_disable(rdev);
4033         radeon_gart_table_vram_free(rdev);
4034         radeon_gart_fini(rdev);
4035 }
4036
4037 /* vm parser */
4038 static bool si_vm_reg_valid(u32 reg)
4039 {
4040         /* context regs are fine */
4041         if (reg >= 0x28000)
4042                 return true;
4043
4044         /* check config regs */
4045         switch (reg) {
4046         case GRBM_GFX_INDEX:
4047         case CP_STRMOUT_CNTL:
4048         case VGT_VTX_VECT_EJECT_REG:
4049         case VGT_CACHE_INVALIDATION:
4050         case VGT_ESGS_RING_SIZE:
4051         case VGT_GSVS_RING_SIZE:
4052         case VGT_GS_VERTEX_REUSE:
4053         case VGT_PRIMITIVE_TYPE:
4054         case VGT_INDEX_TYPE:
4055         case VGT_NUM_INDICES:
4056         case VGT_NUM_INSTANCES:
4057         case VGT_TF_RING_SIZE:
4058         case VGT_HS_OFFCHIP_PARAM:
4059         case VGT_TF_MEMORY_BASE:
4060         case PA_CL_ENHANCE:
4061         case PA_SU_LINE_STIPPLE_VALUE:
4062         case PA_SC_LINE_STIPPLE_STATE:
4063         case PA_SC_ENHANCE:
4064         case SQC_CACHES:
4065         case SPI_STATIC_THREAD_MGMT_1:
4066         case SPI_STATIC_THREAD_MGMT_2:
4067         case SPI_STATIC_THREAD_MGMT_3:
4068         case SPI_PS_MAX_WAVE_ID:
4069         case SPI_CONFIG_CNTL:
4070         case SPI_CONFIG_CNTL_1:
4071         case TA_CNTL_AUX:
4072                 return true;
4073         default:
4074                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4075                 return false;
4076         }
4077 }
4078
4079 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4080                                   u32 *ib, struct radeon_cs_packet *pkt)
4081 {
4082         switch (pkt->opcode) {
4083         case PACKET3_NOP:
4084         case PACKET3_SET_BASE:
4085         case PACKET3_SET_CE_DE_COUNTERS:
4086         case PACKET3_LOAD_CONST_RAM:
4087         case PACKET3_WRITE_CONST_RAM:
4088         case PACKET3_WRITE_CONST_RAM_OFFSET:
4089         case PACKET3_DUMP_CONST_RAM:
4090         case PACKET3_INCREMENT_CE_COUNTER:
4091         case PACKET3_WAIT_ON_DE_COUNTER:
4092         case PACKET3_CE_WRITE:
4093                 break;
4094         default:
4095                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4096                 return -EINVAL;
4097         }
4098         return 0;
4099 }
4100
4101 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4102 {
4103         u32 start_reg, reg, i;
4104         u32 command = ib[idx + 4];
4105         u32 info = ib[idx + 1];
4106         u32 idx_value = ib[idx];
4107         if (command & PACKET3_CP_DMA_CMD_SAS) {
4108                 /* src address space is register */
4109                 if (((info & 0x60000000) >> 29) == 0) {
4110                         start_reg = idx_value << 2;
4111                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4112                                 reg = start_reg;
4113                                 if (!si_vm_reg_valid(reg)) {
4114                                         DRM_ERROR("CP DMA Bad SRC register\n");
4115                                         return -EINVAL;
4116                                 }
4117                         } else {
4118                                 for (i = 0; i < (command & 0x1fffff); i++) {
4119                                         reg = start_reg + (4 * i);
4120                                         if (!si_vm_reg_valid(reg)) {
4121                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4122                                                 return -EINVAL;
4123                                         }
4124                                 }
4125                         }
4126                 }
4127         }
4128         if (command & PACKET3_CP_DMA_CMD_DAS) {
4129                 /* dst address space is register */
4130                 if (((info & 0x00300000) >> 20) == 0) {
4131                         start_reg = ib[idx + 2];
4132                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4133                                 reg = start_reg;
4134                                 if (!si_vm_reg_valid(reg)) {
4135                                         DRM_ERROR("CP DMA Bad DST register\n");
4136                                         return -EINVAL;
4137                                 }
4138                         } else {
4139                                 for (i = 0; i < (command & 0x1fffff); i++) {
4140                                         reg = start_reg + (4 * i);
4141                                 if (!si_vm_reg_valid(reg)) {
4142                                                 DRM_ERROR("CP DMA Bad DST register\n");
4143                                                 return -EINVAL;
4144                                         }
4145                                 }
4146                         }
4147                 }
4148         }
4149         return 0;
4150 }
4151
4152 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4153                                    u32 *ib, struct radeon_cs_packet *pkt)
4154 {
4155         int r;
4156         u32 idx = pkt->idx + 1;
4157         u32 idx_value = ib[idx];
4158         u32 start_reg, end_reg, reg, i;
4159
4160         switch (pkt->opcode) {
4161         case PACKET3_NOP:
4162         case PACKET3_SET_BASE:
4163         case PACKET3_CLEAR_STATE:
4164         case PACKET3_INDEX_BUFFER_SIZE:
4165         case PACKET3_DISPATCH_DIRECT:
4166         case PACKET3_DISPATCH_INDIRECT:
4167         case PACKET3_ALLOC_GDS:
4168         case PACKET3_WRITE_GDS_RAM:
4169         case PACKET3_ATOMIC_GDS:
4170         case PACKET3_ATOMIC:
4171         case PACKET3_OCCLUSION_QUERY:
4172         case PACKET3_SET_PREDICATION:
4173         case PACKET3_COND_EXEC:
4174         case PACKET3_PRED_EXEC:
4175         case PACKET3_DRAW_INDIRECT:
4176         case PACKET3_DRAW_INDEX_INDIRECT:
4177         case PACKET3_INDEX_BASE:
4178         case PACKET3_DRAW_INDEX_2:
4179         case PACKET3_CONTEXT_CONTROL:
4180         case PACKET3_INDEX_TYPE:
4181         case PACKET3_DRAW_INDIRECT_MULTI:
4182         case PACKET3_DRAW_INDEX_AUTO:
4183         case PACKET3_DRAW_INDEX_IMMD:
4184         case PACKET3_NUM_INSTANCES:
4185         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4186         case PACKET3_STRMOUT_BUFFER_UPDATE:
4187         case PACKET3_DRAW_INDEX_OFFSET_2:
4188         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4189         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4190         case PACKET3_MPEG_INDEX:
4191         case PACKET3_WAIT_REG_MEM:
4192         case PACKET3_MEM_WRITE:
4193         case PACKET3_PFP_SYNC_ME:
4194         case PACKET3_SURFACE_SYNC:
4195         case PACKET3_EVENT_WRITE:
4196         case PACKET3_EVENT_WRITE_EOP:
4197         case PACKET3_EVENT_WRITE_EOS:
4198         case PACKET3_SET_CONTEXT_REG:
4199         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4200         case PACKET3_SET_SH_REG:
4201         case PACKET3_SET_SH_REG_OFFSET:
4202         case PACKET3_INCREMENT_DE_COUNTER:
4203         case PACKET3_WAIT_ON_CE_COUNTER:
4204         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4205         case PACKET3_ME_WRITE:
4206                 break;
4207         case PACKET3_COPY_DATA:
4208                 if ((idx_value & 0xf00) == 0) {
4209                         reg = ib[idx + 3] * 4;
4210                         if (!si_vm_reg_valid(reg))
4211                                 return -EINVAL;
4212                 }
4213                 break;
4214         case PACKET3_WRITE_DATA:
4215                 if ((idx_value & 0xf00) == 0) {
4216                         start_reg = ib[idx + 1] * 4;
4217                         if (idx_value & 0x10000) {
4218                                 if (!si_vm_reg_valid(start_reg))
4219                                         return -EINVAL;
4220                         } else {
4221                                 for (i = 0; i < (pkt->count - 2); i++) {
4222                                         reg = start_reg + (4 * i);
4223                                         if (!si_vm_reg_valid(reg))
4224                                                 return -EINVAL;
4225                                 }
4226                         }
4227                 }
4228                 break;
4229         case PACKET3_COND_WRITE:
4230                 if (idx_value & 0x100) {
4231                         reg = ib[idx + 5] * 4;
4232                         if (!si_vm_reg_valid(reg))
4233                                 return -EINVAL;
4234                 }
4235                 break;
4236         case PACKET3_COPY_DW:
4237                 if (idx_value & 0x2) {
4238                         reg = ib[idx + 3] * 4;
4239                         if (!si_vm_reg_valid(reg))
4240                                 return -EINVAL;
4241                 }
4242                 break;
4243         case PACKET3_SET_CONFIG_REG:
4244                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4245                 end_reg = 4 * pkt->count + start_reg - 4;
4246                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4247                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4248                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4249                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4250                         return -EINVAL;
4251                 }
4252                 for (i = 0; i < pkt->count; i++) {
4253                         reg = start_reg + (4 * i);
4254                         if (!si_vm_reg_valid(reg))
4255                                 return -EINVAL;
4256                 }
4257                 break;
4258         case PACKET3_CP_DMA:
4259                 r = si_vm_packet3_cp_dma_check(ib, idx);
4260                 if (r)
4261                         return r;
4262                 break;
4263         default:
4264                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4265                 return -EINVAL;
4266         }
4267         return 0;
4268 }
4269
4270 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4271                                        u32 *ib, struct radeon_cs_packet *pkt)
4272 {
4273         int r;
4274         u32 idx = pkt->idx + 1;
4275         u32 idx_value = ib[idx];
4276         u32 start_reg, reg, i;
4277
4278         switch (pkt->opcode) {
4279         case PACKET3_NOP:
4280         case PACKET3_SET_BASE:
4281         case PACKET3_CLEAR_STATE:
4282         case PACKET3_DISPATCH_DIRECT:
4283         case PACKET3_DISPATCH_INDIRECT:
4284         case PACKET3_ALLOC_GDS:
4285         case PACKET3_WRITE_GDS_RAM:
4286         case PACKET3_ATOMIC_GDS:
4287         case PACKET3_ATOMIC:
4288         case PACKET3_OCCLUSION_QUERY:
4289         case PACKET3_SET_PREDICATION:
4290         case PACKET3_COND_EXEC:
4291         case PACKET3_PRED_EXEC:
4292         case PACKET3_CONTEXT_CONTROL:
4293         case PACKET3_STRMOUT_BUFFER_UPDATE:
4294         case PACKET3_WAIT_REG_MEM:
4295         case PACKET3_MEM_WRITE:
4296         case PACKET3_PFP_SYNC_ME:
4297         case PACKET3_SURFACE_SYNC:
4298         case PACKET3_EVENT_WRITE:
4299         case PACKET3_EVENT_WRITE_EOP:
4300         case PACKET3_EVENT_WRITE_EOS:
4301         case PACKET3_SET_CONTEXT_REG:
4302         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4303         case PACKET3_SET_SH_REG:
4304         case PACKET3_SET_SH_REG_OFFSET:
4305         case PACKET3_INCREMENT_DE_COUNTER:
4306         case PACKET3_WAIT_ON_CE_COUNTER:
4307         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4308         case PACKET3_ME_WRITE:
4309                 break;
4310         case PACKET3_COPY_DATA:
4311                 if ((idx_value & 0xf00) == 0) {
4312                         reg = ib[idx + 3] * 4;
4313                         if (!si_vm_reg_valid(reg))
4314                                 return -EINVAL;
4315                 }
4316                 break;
4317         case PACKET3_WRITE_DATA:
4318                 if ((idx_value & 0xf00) == 0) {
4319                         start_reg = ib[idx + 1] * 4;
4320                         if (idx_value & 0x10000) {
4321                                 if (!si_vm_reg_valid(start_reg))
4322                                         return -EINVAL;
4323                         } else {
4324                                 for (i = 0; i < (pkt->count - 2); i++) {
4325                                         reg = start_reg + (4 * i);
4326                                         if (!si_vm_reg_valid(reg))
4327                                                 return -EINVAL;
4328                                 }
4329                         }
4330                 }
4331                 break;
4332         case PACKET3_COND_WRITE:
4333                 if (idx_value & 0x100) {
4334                         reg = ib[idx + 5] * 4;
4335                         if (!si_vm_reg_valid(reg))
4336                                 return -EINVAL;
4337                 }
4338                 break;
4339         case PACKET3_COPY_DW:
4340                 if (idx_value & 0x2) {
4341                         reg = ib[idx + 3] * 4;
4342                         if (!si_vm_reg_valid(reg))
4343                                 return -EINVAL;
4344                 }
4345                 break;
4346         case PACKET3_CP_DMA:
4347                 r = si_vm_packet3_cp_dma_check(ib, idx);
4348                 if (r)
4349                         return r;
4350                 break;
4351         default:
4352                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4353                 return -EINVAL;
4354         }
4355         return 0;
4356 }
4357
4358 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4359 {
4360         int ret = 0;
4361         u32 idx = 0;
4362         struct radeon_cs_packet pkt;
4363
4364         do {
4365                 pkt.idx = idx;
4366                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4367                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4368                 pkt.one_reg_wr = 0;
4369                 switch (pkt.type) {
4370                 case RADEON_PACKET_TYPE0:
4371                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4372                         ret = -EINVAL;
4373                         break;
4374                 case RADEON_PACKET_TYPE2:
4375                         idx += 1;
4376                         break;
4377                 case RADEON_PACKET_TYPE3:
4378                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4379                         if (ib->is_const_ib)
4380                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4381                         else {
4382                                 switch (ib->ring) {
4383                                 case RADEON_RING_TYPE_GFX_INDEX:
4384                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4385                                         break;
4386                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4387                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4388                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4389                                         break;
4390                                 default:
4391                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4392                                         ret = -EINVAL;
4393                                         break;
4394                                 }
4395                         }
4396                         idx += pkt.count + 2;
4397                         break;
4398                 default:
4399                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4400                         ret = -EINVAL;
4401                         break;
4402                 }
4403                 if (ret)
4404                         break;
4405         } while (idx < ib->length_dw);
4406
4407         return ret;
4408 }
4409
4410 /*
4411  * vm
4412  */
4413 int si_vm_init(struct radeon_device *rdev)
4414 {
4415         /* number of VMs */
4416         rdev->vm_manager.nvm = 16;
4417         /* base offset of vram pages */
4418         rdev->vm_manager.vram_base_offset = 0;
4419
4420         return 0;
4421 }
4422
4423 void si_vm_fini(struct radeon_device *rdev)
4424 {
4425 }
4426
4427 /**
4428  * si_vm_decode_fault - print human readable fault info
4429  *
4430  * @rdev: radeon_device pointer
4431  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4432  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4433  *
4434  * Print human readable fault information (SI).
4435  */
4436 static void si_vm_decode_fault(struct radeon_device *rdev,
4437                                u32 status, u32 addr)
4438 {
4439         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4440         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4441         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4442         char *block;
4443
4444         if (rdev->family == CHIP_TAHITI) {
4445                 switch (mc_id) {
4446                 case 160:
4447                 case 144:
4448                 case 96:
4449                 case 80:
4450                 case 224:
4451                 case 208:
4452                 case 32:
4453                 case 16:
4454                         block = "CB";
4455                         break;
4456                 case 161:
4457                 case 145:
4458                 case 97:
4459                 case 81:
4460                 case 225:
4461                 case 209:
4462                 case 33:
4463                 case 17:
4464                         block = "CB_FMASK";
4465                         break;
4466                 case 162:
4467                 case 146:
4468                 case 98:
4469                 case 82:
4470                 case 226:
4471                 case 210:
4472                 case 34:
4473                 case 18:
4474                         block = "CB_CMASK";
4475                         break;
4476                 case 163:
4477                 case 147:
4478                 case 99:
4479                 case 83:
4480                 case 227:
4481                 case 211:
4482                 case 35:
4483                 case 19:
4484                         block = "CB_IMMED";
4485                         break;
4486                 case 164:
4487                 case 148:
4488                 case 100:
4489                 case 84:
4490                 case 228:
4491                 case 212:
4492                 case 36:
4493                 case 20:
4494                         block = "DB";
4495                         break;
4496                 case 165:
4497                 case 149:
4498                 case 101:
4499                 case 85:
4500                 case 229:
4501                 case 213:
4502                 case 37:
4503                 case 21:
4504                         block = "DB_HTILE";
4505                         break;
4506                 case 167:
4507                 case 151:
4508                 case 103:
4509                 case 87:
4510                 case 231:
4511                 case 215:
4512                 case 39:
4513                 case 23:
4514                         block = "DB_STEN";
4515                         break;
4516                 case 72:
4517                 case 68:
4518                 case 64:
4519                 case 8:
4520                 case 4:
4521                 case 0:
4522                 case 136:
4523                 case 132:
4524                 case 128:
4525                 case 200:
4526                 case 196:
4527                 case 192:
4528                         block = "TC";
4529                         break;
4530                 case 112:
4531                 case 48:
4532                         block = "CP";
4533                         break;
4534                 case 49:
4535                 case 177:
4536                 case 50:
4537                 case 178:
4538                         block = "SH";
4539                         break;
4540                 case 53:
4541                 case 190:
4542                         block = "VGT";
4543                         break;
4544                 case 117:
4545                         block = "IH";
4546                         break;
4547                 case 51:
4548                 case 115:
4549                         block = "RLC";
4550                         break;
4551                 case 119:
4552                 case 183:
4553                         block = "DMA0";
4554                         break;
4555                 case 61:
4556                         block = "DMA1";
4557                         break;
4558                 case 248:
4559                 case 120:
4560                         block = "HDP";
4561                         break;
4562                 default:
4563                         block = "unknown";
4564                         break;
4565                 }
4566         } else {
4567                 switch (mc_id) {
4568                 case 32:
4569                 case 16:
4570                 case 96:
4571                 case 80:
4572                 case 160:
4573                 case 144:
4574                 case 224:
4575                 case 208:
4576                         block = "CB";
4577                         break;
4578                 case 33:
4579                 case 17:
4580                 case 97:
4581                 case 81:
4582                 case 161:
4583                 case 145:
4584                 case 225:
4585                 case 209:
4586                         block = "CB_FMASK";
4587                         break;
4588                 case 34:
4589                 case 18:
4590                 case 98:
4591                 case 82:
4592                 case 162:
4593                 case 146:
4594                 case 226:
4595                 case 210:
4596                         block = "CB_CMASK";
4597                         break;
4598                 case 35:
4599                 case 19:
4600                 case 99:
4601                 case 83:
4602                 case 163:
4603                 case 147:
4604                 case 227:
4605                 case 211:
4606                         block = "CB_IMMED";
4607                         break;
4608                 case 36:
4609                 case 20:
4610                 case 100:
4611                 case 84:
4612                 case 164:
4613                 case 148:
4614                 case 228:
4615                 case 212:
4616                         block = "DB";
4617                         break;
4618                 case 37:
4619                 case 21:
4620                 case 101:
4621                 case 85:
4622                 case 165:
4623                 case 149:
4624                 case 229:
4625                 case 213:
4626                         block = "DB_HTILE";
4627                         break;
4628                 case 39:
4629                 case 23:
4630                 case 103:
4631                 case 87:
4632                 case 167:
4633                 case 151:
4634                 case 231:
4635                 case 215:
4636                         block = "DB_STEN";
4637                         break;
4638                 case 72:
4639                 case 68:
4640                 case 8:
4641                 case 4:
4642                 case 136:
4643                 case 132:
4644                 case 200:
4645                 case 196:
4646                         block = "TC";
4647                         break;
4648                 case 112:
4649                 case 48:
4650                         block = "CP";
4651                         break;
4652                 case 49:
4653                 case 177:
4654                 case 50:
4655                 case 178:
4656                         block = "SH";
4657                         break;
4658                 case 53:
4659                         block = "VGT";
4660                         break;
4661                 case 117:
4662                         block = "IH";
4663                         break;
4664                 case 51:
4665                 case 115:
4666                         block = "RLC";
4667                         break;
4668                 case 119:
4669                 case 183:
4670                         block = "DMA0";
4671                         break;
4672                 case 61:
4673                         block = "DMA1";
4674                         break;
4675                 case 248:
4676                 case 120:
4677                         block = "HDP";
4678                         break;
4679                 default:
4680                         block = "unknown";
4681                         break;
4682                 }
4683         }
4684
4685         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4686                protections, vmid, addr,
4687                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4688                block, mc_id);
4689 }
4690
4691 /**
4692  * si_vm_set_page - update the page tables using the CP
4693  *
4694  * @rdev: radeon_device pointer
4695  * @ib: indirect buffer to fill with commands
4696  * @pe: addr of the page entry
4697  * @addr: dst addr to write into pe
4698  * @count: number of page entries to update
4699  * @incr: increase next addr by incr bytes
4700  * @flags: access flags
4701  *
4702  * Update the page tables using the CP (SI).
4703  */
4704 void si_vm_set_page(struct radeon_device *rdev,
4705                     struct radeon_ib *ib,
4706                     uint64_t pe,
4707                     uint64_t addr, unsigned count,
4708                     uint32_t incr, uint32_t flags)
4709 {
4710         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4711         uint64_t value;
4712         unsigned ndw;
4713
4714         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4715                 while (count) {
4716                         ndw = 2 + count * 2;
4717                         if (ndw > 0x3FFE)
4718                                 ndw = 0x3FFE;
4719
4720                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4721                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4722                                         WRITE_DATA_DST_SEL(1));
4723                         ib->ptr[ib->length_dw++] = pe;
4724                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4725                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4726                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4727                                         value = radeon_vm_map_gart(rdev, addr);
4728                                         value &= 0xFFFFFFFFFFFFF000ULL;
4729                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4730                                         value = addr;
4731                                 } else {
4732                                         value = 0;
4733                                 }
4734                                 addr += incr;
4735                                 value |= r600_flags;
4736                                 ib->ptr[ib->length_dw++] = value;
4737                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4738                         }
4739                 }
4740         } else {
4741                 /* DMA */
4742                 si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4743         }
4744 }
4745
4746 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4747 {
4748         struct radeon_ring *ring = &rdev->ring[ridx];
4749
4750         if (vm == NULL)
4751                 return;
4752
4753         /* write new base address */
4754         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4755         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4756                                  WRITE_DATA_DST_SEL(0)));
4757
4758         if (vm->id < 8) {
4759                 radeon_ring_write(ring,
4760                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4761         } else {
4762                 radeon_ring_write(ring,
4763                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4764         }
4765         radeon_ring_write(ring, 0);
4766         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4767
4768         /* flush hdp cache */
4769         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4770         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4771                                  WRITE_DATA_DST_SEL(0)));
4772         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4773         radeon_ring_write(ring, 0);
4774         radeon_ring_write(ring, 0x1);
4775
4776         /* bits 0-15 are the VM contexts0-15 */
4777         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4778         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4779                                  WRITE_DATA_DST_SEL(0)));
4780         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4781         radeon_ring_write(ring, 0);
4782         radeon_ring_write(ring, 1 << vm->id);
4783
4784         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4785         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4786         radeon_ring_write(ring, 0x0);
4787 }
4788
4789 /*
4790  *  Power and clock gating
4791  */
4792 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4793 {
4794         int i;
4795
4796         for (i = 0; i < rdev->usec_timeout; i++) {
4797                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4798                         break;
4799                 udelay(1);
4800         }
4801
4802         for (i = 0; i < rdev->usec_timeout; i++) {
4803                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4804                         break;
4805                 udelay(1);
4806         }
4807 }
4808
4809 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4810                                          bool enable)
4811 {
4812         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4813         u32 mask;
4814         int i;
4815
4816         if (enable)
4817                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4818         else
4819                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4820         WREG32(CP_INT_CNTL_RING0, tmp);
4821
4822         if (!enable) {
4823                 /* read a gfx register */
4824                 tmp = RREG32(DB_DEPTH_INFO);
4825
4826                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4827                 for (i = 0; i < rdev->usec_timeout; i++) {
4828                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4829                                 break;
4830                         udelay(1);
4831                 }
4832         }
4833 }
4834
4835 static void si_set_uvd_dcm(struct radeon_device *rdev,
4836                            bool sw_mode)
4837 {
4838         u32 tmp, tmp2;
4839
4840         tmp = RREG32(UVD_CGC_CTRL);
4841         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4842         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4843
4844         if (sw_mode) {
4845                 tmp &= ~0x7ffff800;
4846                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4847         } else {
4848                 tmp |= 0x7ffff800;
4849                 tmp2 = 0;
4850         }
4851
4852         WREG32(UVD_CGC_CTRL, tmp);
4853         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4854 }
4855
4856 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4857 {
4858         bool hw_mode = true;
4859
4860         if (hw_mode) {
4861                 si_set_uvd_dcm(rdev, false);
4862         } else {
4863                 u32 tmp = RREG32(UVD_CGC_CTRL);
4864                 tmp &= ~DCM;
4865                 WREG32(UVD_CGC_CTRL, tmp);
4866         }
4867 }
4868
4869 static u32 si_halt_rlc(struct radeon_device *rdev)
4870 {
4871         u32 data, orig;
4872
4873         orig = data = RREG32(RLC_CNTL);
4874
4875         if (data & RLC_ENABLE) {
4876                 data &= ~RLC_ENABLE;
4877                 WREG32(RLC_CNTL, data);
4878
4879                 si_wait_for_rlc_serdes(rdev);
4880         }
4881
4882         return orig;
4883 }
4884
4885 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4886 {
4887         u32 tmp;
4888
4889         tmp = RREG32(RLC_CNTL);
4890         if (tmp != rlc)
4891                 WREG32(RLC_CNTL, rlc);
4892 }
4893
4894 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4895 {
4896         u32 data, orig;
4897
4898         orig = data = RREG32(DMA_PG);
4899         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4900                 data |= PG_CNTL_ENABLE;
4901         else
4902                 data &= ~PG_CNTL_ENABLE;
4903         if (orig != data)
4904                 WREG32(DMA_PG, data);
4905 }
4906
4907 static void si_init_dma_pg(struct radeon_device *rdev)
4908 {
4909         u32 tmp;
4910
4911         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4912         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4913
4914         for (tmp = 0; tmp < 5; tmp++)
4915                 WREG32(DMA_PGFSM_WRITE, 0);
4916 }
4917
4918 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4919                                bool enable)
4920 {
4921         u32 tmp;
4922
4923         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4924                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4925                 WREG32(RLC_TTOP_D, tmp);
4926
4927                 tmp = RREG32(RLC_PG_CNTL);
4928                 tmp |= GFX_PG_ENABLE;
4929                 WREG32(RLC_PG_CNTL, tmp);
4930
4931                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4932                 tmp |= AUTO_PG_EN;
4933                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4934         } else {
4935                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4936                 tmp &= ~AUTO_PG_EN;
4937                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4938
4939                 tmp = RREG32(DB_RENDER_CONTROL);
4940         }
4941 }
4942
4943 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4944 {
4945         u32 tmp;
4946
4947         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4948
4949         tmp = RREG32(RLC_PG_CNTL);
4950         tmp |= GFX_PG_SRC;
4951         WREG32(RLC_PG_CNTL, tmp);
4952
4953         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4954
4955         tmp = RREG32(RLC_AUTO_PG_CTRL);
4956
4957         tmp &= ~GRBM_REG_SGIT_MASK;
4958         tmp |= GRBM_REG_SGIT(0x700);
4959         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4960         WREG32(RLC_AUTO_PG_CTRL, tmp);
4961 }
4962
4963 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4964 {
4965         u32 mask = 0, tmp, tmp1;
4966         int i;
4967
4968         si_select_se_sh(rdev, se, sh);
4969         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4970         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4971         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4972
4973         tmp &= 0xffff0000;
4974
4975         tmp |= tmp1;
4976         tmp >>= 16;
4977
4978         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4979                 mask <<= 1;
4980                 mask |= 1;
4981         }
4982
4983         return (~tmp) & mask;
4984 }
4985
4986 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4987 {
4988         u32 i, j, k, active_cu_number = 0;
4989         u32 mask, counter, cu_bitmap;
4990         u32 tmp = 0;
4991
4992         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4993                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4994                         mask = 1;
4995                         cu_bitmap = 0;
4996                         counter  = 0;
4997                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4998                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4999                                         if (counter < 2)
5000                                                 cu_bitmap |= mask;
5001                                         counter++;
5002                                 }
5003                                 mask <<= 1;
5004                         }
5005
5006                         active_cu_number += counter;
5007                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5008                 }
5009         }
5010
5011         WREG32(RLC_PG_AO_CU_MASK, tmp);
5012
5013         tmp = RREG32(RLC_MAX_PG_CU);
5014         tmp &= ~MAX_PU_CU_MASK;
5015         tmp |= MAX_PU_CU(active_cu_number);
5016         WREG32(RLC_MAX_PG_CU, tmp);
5017 }
5018
5019 static void si_enable_cgcg(struct radeon_device *rdev,
5020                            bool enable)
5021 {
5022         u32 data, orig, tmp;
5023
5024         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5025
5026         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5027                 si_enable_gui_idle_interrupt(rdev, true);
5028
5029                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5030
5031                 tmp = si_halt_rlc(rdev);
5032
5033                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5034                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5035                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5036
5037                 si_wait_for_rlc_serdes(rdev);
5038
5039                 si_update_rlc(rdev, tmp);
5040
5041                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5042
5043                 data |= CGCG_EN | CGLS_EN;
5044         } else {
5045                 si_enable_gui_idle_interrupt(rdev, false);
5046
5047                 RREG32(CB_CGTT_SCLK_CTRL);
5048                 RREG32(CB_CGTT_SCLK_CTRL);
5049                 RREG32(CB_CGTT_SCLK_CTRL);
5050                 RREG32(CB_CGTT_SCLK_CTRL);
5051
5052                 data &= ~(CGCG_EN | CGLS_EN);
5053         }
5054
5055         if (orig != data)
5056                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5057 }
5058
5059 static void si_enable_mgcg(struct radeon_device *rdev,
5060                            bool enable)
5061 {
5062         u32 data, orig, tmp = 0;
5063
5064         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5065                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5066                 data = 0x96940200;
5067                 if (orig != data)
5068                         WREG32(CGTS_SM_CTRL_REG, data);
5069
5070                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5071                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5072                         data |= CP_MEM_LS_EN;
5073                         if (orig != data)
5074                                 WREG32(CP_MEM_SLP_CNTL, data);
5075                 }
5076
5077                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5078                 data &= 0xffffffc0;
5079                 if (orig != data)
5080                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5081
5082                 tmp = si_halt_rlc(rdev);
5083
5084                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5085                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5086                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5087
5088                 si_update_rlc(rdev, tmp);
5089         } else {
5090                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5091                 data |= 0x00000003;
5092                 if (orig != data)
5093                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5094
5095                 data = RREG32(CP_MEM_SLP_CNTL);
5096                 if (data & CP_MEM_LS_EN) {
5097                         data &= ~CP_MEM_LS_EN;
5098                         WREG32(CP_MEM_SLP_CNTL, data);
5099                 }
5100                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5101                 data |= LS_OVERRIDE | OVERRIDE;
5102                 if (orig != data)
5103                         WREG32(CGTS_SM_CTRL_REG, data);
5104
5105                 tmp = si_halt_rlc(rdev);
5106
5107                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5108                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5109                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5110
5111                 si_update_rlc(rdev, tmp);
5112         }
5113 }
5114
5115 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5116                                bool enable)
5117 {
5118         u32 orig, data, tmp;
5119
5120         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5121                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5122                 tmp |= 0x3fff;
5123                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5124
5125                 orig = data = RREG32(UVD_CGC_CTRL);
5126                 data |= DCM;
5127                 if (orig != data)
5128                         WREG32(UVD_CGC_CTRL, data);
5129
5130                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5131                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5132         } else {
5133                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5134                 tmp &= ~0x3fff;
5135                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5136
5137                 orig = data = RREG32(UVD_CGC_CTRL);
5138                 data &= ~DCM;
5139                 if (orig != data)
5140                         WREG32(UVD_CGC_CTRL, data);
5141
5142                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5143                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5144         }
5145 }
5146
5147 static const u32 mc_cg_registers[] =
5148 {
5149         MC_HUB_MISC_HUB_CG,
5150         MC_HUB_MISC_SIP_CG,
5151         MC_HUB_MISC_VM_CG,
5152         MC_XPB_CLK_GAT,
5153         ATC_MISC_CG,
5154         MC_CITF_MISC_WR_CG,
5155         MC_CITF_MISC_RD_CG,
5156         MC_CITF_MISC_VM_CG,
5157         VM_L2_CG,
5158 };
5159
5160 static void si_enable_mc_ls(struct radeon_device *rdev,
5161                             bool enable)
5162 {
5163         int i;
5164         u32 orig, data;
5165
5166         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5167                 orig = data = RREG32(mc_cg_registers[i]);
5168                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5169                         data |= MC_LS_ENABLE;
5170                 else
5171                         data &= ~MC_LS_ENABLE;
5172                 if (data != orig)
5173                         WREG32(mc_cg_registers[i], data);
5174         }
5175 }
5176
5177 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5178                                bool enable)
5179 {
5180         int i;
5181         u32 orig, data;
5182
5183         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5184                 orig = data = RREG32(mc_cg_registers[i]);
5185                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5186                         data |= MC_CG_ENABLE;
5187                 else
5188                         data &= ~MC_CG_ENABLE;
5189                 if (data != orig)
5190                         WREG32(mc_cg_registers[i], data);
5191         }
5192 }
5193
5194 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5195                                bool enable)
5196 {
5197         u32 orig, data, offset;
5198         int i;
5199
5200         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5201                 for (i = 0; i < 2; i++) {
5202                         if (i == 0)
5203                                 offset = DMA0_REGISTER_OFFSET;
5204                         else
5205                                 offset = DMA1_REGISTER_OFFSET;
5206                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5207                         data &= ~MEM_POWER_OVERRIDE;
5208                         if (data != orig)
5209                                 WREG32(DMA_POWER_CNTL + offset, data);
5210                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5211                 }
5212         } else {
5213                 for (i = 0; i < 2; i++) {
5214                         if (i == 0)
5215                                 offset = DMA0_REGISTER_OFFSET;
5216                         else
5217                                 offset = DMA1_REGISTER_OFFSET;
5218                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5219                         data |= MEM_POWER_OVERRIDE;
5220                         if (data != orig)
5221                                 WREG32(DMA_POWER_CNTL + offset, data);
5222
5223                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5224                         data = 0xff000000;
5225                         if (data != orig)
5226                                 WREG32(DMA_CLK_CTRL + offset, data);
5227                 }
5228         }
5229 }
5230
5231 static void si_enable_bif_mgls(struct radeon_device *rdev,
5232                                bool enable)
5233 {
5234         u32 orig, data;
5235
5236         orig = data = RREG32_PCIE(PCIE_CNTL2);
5237
5238         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5239                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5240                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5241         else
5242                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5243                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5244
5245         if (orig != data)
5246                 WREG32_PCIE(PCIE_CNTL2, data);
5247 }
5248
5249 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5250                                bool enable)
5251 {
5252         u32 orig, data;
5253
5254         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5255
5256         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5257                 data &= ~CLOCK_GATING_DIS;
5258         else
5259                 data |= CLOCK_GATING_DIS;
5260
5261         if (orig != data)
5262                 WREG32(HDP_HOST_PATH_CNTL, data);
5263 }
5264
5265 static void si_enable_hdp_ls(struct radeon_device *rdev,
5266                              bool enable)
5267 {
5268         u32 orig, data;
5269
5270         orig = data = RREG32(HDP_MEM_POWER_LS);
5271
5272         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5273                 data |= HDP_LS_ENABLE;
5274         else
5275                 data &= ~HDP_LS_ENABLE;
5276
5277         if (orig != data)
5278                 WREG32(HDP_MEM_POWER_LS, data);
5279 }
5280
5281 void si_update_cg(struct radeon_device *rdev,
5282                   u32 block, bool enable)
5283 {
5284         if (block & RADEON_CG_BLOCK_GFX) {
5285                 si_enable_gui_idle_interrupt(rdev, false);
5286                 /* order matters! */
5287                 if (enable) {
5288                         si_enable_mgcg(rdev, true);
5289                         si_enable_cgcg(rdev, true);
5290                 } else {
5291                         si_enable_cgcg(rdev, false);
5292                         si_enable_mgcg(rdev, false);
5293                 }
5294                 si_enable_gui_idle_interrupt(rdev, true);
5295         }
5296
5297         if (block & RADEON_CG_BLOCK_MC) {
5298                 si_enable_mc_mgcg(rdev, enable);
5299                 si_enable_mc_ls(rdev, enable);
5300         }
5301
5302         if (block & RADEON_CG_BLOCK_SDMA) {
5303                 si_enable_dma_mgcg(rdev, enable);
5304         }
5305
5306         if (block & RADEON_CG_BLOCK_BIF) {
5307                 si_enable_bif_mgls(rdev, enable);
5308         }
5309
5310         if (block & RADEON_CG_BLOCK_UVD) {
5311                 if (rdev->has_uvd) {
5312                         si_enable_uvd_mgcg(rdev, enable);
5313                 }
5314         }
5315
5316         if (block & RADEON_CG_BLOCK_HDP) {
5317                 si_enable_hdp_mgcg(rdev, enable);
5318                 si_enable_hdp_ls(rdev, enable);
5319         }
5320 }
5321
5322 static void si_init_cg(struct radeon_device *rdev)
5323 {
5324         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5325                             RADEON_CG_BLOCK_MC |
5326                             RADEON_CG_BLOCK_SDMA |
5327                             RADEON_CG_BLOCK_BIF |
5328                             RADEON_CG_BLOCK_HDP), true);
5329         if (rdev->has_uvd) {
5330                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5331                 si_init_uvd_internal_cg(rdev);
5332         }
5333 }
5334
5335 static void si_fini_cg(struct radeon_device *rdev)
5336 {
5337         if (rdev->has_uvd) {
5338                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5339         }
5340         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5341                             RADEON_CG_BLOCK_MC |
5342                             RADEON_CG_BLOCK_SDMA |
5343                             RADEON_CG_BLOCK_BIF |
5344                             RADEON_CG_BLOCK_HDP), false);
5345 }
5346
5347 u32 si_get_csb_size(struct radeon_device *rdev)
5348 {
5349         u32 count = 0;
5350         const struct cs_section_def *sect = NULL;
5351         const struct cs_extent_def *ext = NULL;
5352
5353         if (rdev->rlc.cs_data == NULL)
5354                 return 0;
5355
5356         /* begin clear state */
5357         count += 2;
5358         /* context control state */
5359         count += 3;
5360
5361         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5362                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5363                         if (sect->id == SECT_CONTEXT)
5364                                 count += 2 + ext->reg_count;
5365                         else
5366                                 return 0;
5367                 }
5368         }
5369         /* pa_sc_raster_config */
5370         count += 3;
5371         /* end clear state */
5372         count += 2;
5373         /* clear state */
5374         count += 2;
5375
5376         return count;
5377 }
5378
5379 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5380 {
5381         u32 count = 0, i;
5382         const struct cs_section_def *sect = NULL;
5383         const struct cs_extent_def *ext = NULL;
5384
5385         if (rdev->rlc.cs_data == NULL)
5386                 return;
5387         if (buffer == NULL)
5388                 return;
5389
5390         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5391         buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5392
5393         buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5394         buffer[count++] = 0x80000000;
5395         buffer[count++] = 0x80000000;
5396
5397         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5398                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5399                         if (sect->id == SECT_CONTEXT) {
5400                                 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5401                                 buffer[count++] = ext->reg_index - 0xa000;
5402                                 for (i = 0; i < ext->reg_count; i++)
5403                                         buffer[count++] = ext->extent[i];
5404                         } else {
5405                                 return;
5406                         }
5407                 }
5408         }
5409
5410         buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
5411         buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5412         switch (rdev->family) {
5413         case CHIP_TAHITI:
5414         case CHIP_PITCAIRN:
5415                 buffer[count++] = 0x2a00126a;
5416                 break;
5417         case CHIP_VERDE:
5418                 buffer[count++] = 0x0000124a;
5419                 break;
5420         case CHIP_OLAND:
5421                 buffer[count++] = 0x00000082;
5422                 break;
5423         case CHIP_HAINAN:
5424                 buffer[count++] = 0x00000000;
5425                 break;
5426         default:
5427                 buffer[count++] = 0x00000000;
5428                 break;
5429         }
5430
5431         buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5432         buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5433
5434         buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5435         buffer[count++] = 0;
5436 }
5437
5438 static void si_init_pg(struct radeon_device *rdev)
5439 {
5440         if (rdev->pg_flags) {
5441                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5442                         si_init_dma_pg(rdev);
5443                 }
5444                 si_init_ao_cu_mask(rdev);
5445                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5446                         si_init_gfx_cgpg(rdev);
5447                 }
5448                 si_enable_dma_pg(rdev, true);
5449                 si_enable_gfx_cgpg(rdev, true);
5450         } else {
5451                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5452                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5453         }
5454 }
5455
5456 static void si_fini_pg(struct radeon_device *rdev)
5457 {
5458         if (rdev->pg_flags) {
5459                 si_enable_dma_pg(rdev, false);
5460                 si_enable_gfx_cgpg(rdev, false);
5461         }
5462 }
5463
5464 /*
5465  * RLC
5466  */
5467 void si_rlc_reset(struct radeon_device *rdev)
5468 {
5469         u32 tmp = RREG32(GRBM_SOFT_RESET);
5470
5471         tmp |= SOFT_RESET_RLC;
5472         WREG32(GRBM_SOFT_RESET, tmp);
5473         udelay(50);
5474         tmp &= ~SOFT_RESET_RLC;
5475         WREG32(GRBM_SOFT_RESET, tmp);
5476         udelay(50);
5477 }
5478
5479 static void si_rlc_stop(struct radeon_device *rdev)
5480 {
5481         WREG32(RLC_CNTL, 0);
5482
5483         si_enable_gui_idle_interrupt(rdev, false);
5484
5485         si_wait_for_rlc_serdes(rdev);
5486 }
5487
5488 static void si_rlc_start(struct radeon_device *rdev)
5489 {
5490         WREG32(RLC_CNTL, RLC_ENABLE);
5491
5492         si_enable_gui_idle_interrupt(rdev, true);
5493
5494         udelay(50);
5495 }
5496
5497 static bool si_lbpw_supported(struct radeon_device *rdev)
5498 {
5499         u32 tmp;
5500
5501         /* Enable LBPW only for DDR3 */
5502         tmp = RREG32(MC_SEQ_MISC0);
5503         if ((tmp & 0xF0000000) == 0xB0000000)
5504                 return true;
5505         return false;
5506 }
5507
5508 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5509 {
5510         u32 tmp;
5511
5512         tmp = RREG32(RLC_LB_CNTL);
5513         if (enable)
5514                 tmp |= LOAD_BALANCE_ENABLE;
5515         else
5516                 tmp &= ~LOAD_BALANCE_ENABLE;
5517         WREG32(RLC_LB_CNTL, tmp);
5518
5519         if (!enable) {
5520                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5521                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5522         }
5523 }
5524
5525 static int si_rlc_resume(struct radeon_device *rdev)
5526 {
5527         u32 i;
5528         const __be32 *fw_data;
5529
5530         if (!rdev->rlc_fw)
5531                 return -EINVAL;
5532
5533         si_rlc_stop(rdev);
5534
5535         si_rlc_reset(rdev);
5536
5537         si_init_pg(rdev);
5538
5539         si_init_cg(rdev);
5540
5541         WREG32(RLC_RL_BASE, 0);
5542         WREG32(RLC_RL_SIZE, 0);
5543         WREG32(RLC_LB_CNTL, 0);
5544         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5545         WREG32(RLC_LB_CNTR_INIT, 0);
5546         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5547
5548         WREG32(RLC_MC_CNTL, 0);
5549         WREG32(RLC_UCODE_CNTL, 0);
5550
5551         fw_data = (const __be32 *)rdev->rlc_fw->data;
5552         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5553                 WREG32(RLC_UCODE_ADDR, i);
5554                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5555         }
5556         WREG32(RLC_UCODE_ADDR, 0);
5557
5558         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5559
5560         si_rlc_start(rdev);
5561
5562         return 0;
5563 }
5564
5565 static void si_enable_interrupts(struct radeon_device *rdev)
5566 {
5567         u32 ih_cntl = RREG32(IH_CNTL);
5568         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5569
5570         ih_cntl |= ENABLE_INTR;
5571         ih_rb_cntl |= IH_RB_ENABLE;
5572         WREG32(IH_CNTL, ih_cntl);
5573         WREG32(IH_RB_CNTL, ih_rb_cntl);
5574         rdev->ih.enabled = true;
5575 }
5576
5577 static void si_disable_interrupts(struct radeon_device *rdev)
5578 {
5579         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5580         u32 ih_cntl = RREG32(IH_CNTL);
5581
5582         ih_rb_cntl &= ~IH_RB_ENABLE;
5583         ih_cntl &= ~ENABLE_INTR;
5584         WREG32(IH_RB_CNTL, ih_rb_cntl);
5585         WREG32(IH_CNTL, ih_cntl);
5586         /* set rptr, wptr to 0 */
5587         WREG32(IH_RB_RPTR, 0);
5588         WREG32(IH_RB_WPTR, 0);
5589         rdev->ih.enabled = false;
5590         rdev->ih.rptr = 0;
5591 }
5592
5593 static void si_disable_interrupt_state(struct radeon_device *rdev)
5594 {
5595         u32 tmp;
5596
5597         tmp = RREG32(CP_INT_CNTL_RING0) &
5598                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5599         WREG32(CP_INT_CNTL_RING0, tmp);
5600         WREG32(CP_INT_CNTL_RING1, 0);
5601         WREG32(CP_INT_CNTL_RING2, 0);
5602         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5603         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5604         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5605         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5606         WREG32(GRBM_INT_CNTL, 0);
5607         if (rdev->num_crtc >= 2) {
5608                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5609                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5610         }
5611         if (rdev->num_crtc >= 4) {
5612                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5613                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5614         }
5615         if (rdev->num_crtc >= 6) {
5616                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5617                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5618         }
5619
5620         if (rdev->num_crtc >= 2) {
5621                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5622                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5623         }
5624         if (rdev->num_crtc >= 4) {
5625                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5626                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5627         }
5628         if (rdev->num_crtc >= 6) {
5629                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5630                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5631         }
5632
5633         if (!ASIC_IS_NODCE(rdev)) {
5634                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5635
5636                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5637                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5638                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5639                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5640                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5641                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5642                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5643                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5644                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5645                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5646                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5647                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5648         }
5649 }
5650
5651 static int si_irq_init(struct radeon_device *rdev)
5652 {
5653         int ret = 0;
5654         int rb_bufsz;
5655         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5656
5657         /* allocate ring */
5658         ret = r600_ih_ring_alloc(rdev);
5659         if (ret)
5660                 return ret;
5661
5662         /* disable irqs */
5663         si_disable_interrupts(rdev);
5664
5665         /* init rlc */
5666         ret = si_rlc_resume(rdev);
5667         if (ret) {
5668                 r600_ih_ring_fini(rdev);
5669                 return ret;
5670         }
5671
5672         /* setup interrupt control */
5673         /* set dummy read address to ring address */
5674         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5675         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5676         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5677          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5678          */
5679         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5680         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5681         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5682         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5683
5684         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5685         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5686
5687         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5688                       IH_WPTR_OVERFLOW_CLEAR |
5689                       (rb_bufsz << 1));
5690
5691         if (rdev->wb.enabled)
5692                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5693
5694         /* set the writeback address whether it's enabled or not */
5695         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5696         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5697
5698         WREG32(IH_RB_CNTL, ih_rb_cntl);
5699
5700         /* set rptr, wptr to 0 */
5701         WREG32(IH_RB_RPTR, 0);
5702         WREG32(IH_RB_WPTR, 0);
5703
5704         /* Default settings for IH_CNTL (disabled at first) */
5705         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5706         /* RPTR_REARM only works if msi's are enabled */
5707         if (rdev->msi_enabled)
5708                 ih_cntl |= RPTR_REARM;
5709         WREG32(IH_CNTL, ih_cntl);
5710
5711         /* force the active interrupt state to all disabled */
5712         si_disable_interrupt_state(rdev);
5713
5714         pci_enable_busmaster(rdev->dev);
5715
5716         /* enable irqs */
5717         si_enable_interrupts(rdev);
5718
5719         return ret;
5720 }
5721
5722 int si_irq_set(struct radeon_device *rdev)
5723 {
5724         u32 cp_int_cntl;
5725         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5726         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5727         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5728         u32 grbm_int_cntl = 0;
5729         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5730         u32 dma_cntl, dma_cntl1;
5731         u32 thermal_int = 0;
5732
5733         if (!rdev->irq.installed) {
5734                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5735                 return -EINVAL;
5736         }
5737         /* don't enable anything if the ih is disabled */
5738         if (!rdev->ih.enabled) {
5739                 si_disable_interrupts(rdev);
5740                 /* force the active interrupt state to all disabled */
5741                 si_disable_interrupt_state(rdev);
5742                 return 0;
5743         }
5744
5745         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5746                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5747
5748         if (!ASIC_IS_NODCE(rdev)) {
5749                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5750                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5751                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5752                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5753                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5754                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5755         }
5756
5757         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5758         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5759
5760         thermal_int = RREG32(CG_THERMAL_INT) &
5761                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5762
5763         /* enable CP interrupts on all rings */
5764         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5765                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5766                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5767         }
5768         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5769                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5770                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5771         }
5772         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5773                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5774                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5775         }
5776         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5777                 DRM_DEBUG("si_irq_set: sw int dma\n");
5778                 dma_cntl |= TRAP_ENABLE;
5779         }
5780
5781         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5782                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5783                 dma_cntl1 |= TRAP_ENABLE;
5784         }
5785         if (rdev->irq.crtc_vblank_int[0] ||
5786             atomic_read(&rdev->irq.pflip[0])) {
5787                 DRM_DEBUG("si_irq_set: vblank 0\n");
5788                 crtc1 |= VBLANK_INT_MASK;
5789         }
5790         if (rdev->irq.crtc_vblank_int[1] ||
5791             atomic_read(&rdev->irq.pflip[1])) {
5792                 DRM_DEBUG("si_irq_set: vblank 1\n");
5793                 crtc2 |= VBLANK_INT_MASK;
5794         }
5795         if (rdev->irq.crtc_vblank_int[2] ||
5796             atomic_read(&rdev->irq.pflip[2])) {
5797                 DRM_DEBUG("si_irq_set: vblank 2\n");
5798                 crtc3 |= VBLANK_INT_MASK;
5799         }
5800         if (rdev->irq.crtc_vblank_int[3] ||
5801             atomic_read(&rdev->irq.pflip[3])) {
5802                 DRM_DEBUG("si_irq_set: vblank 3\n");
5803                 crtc4 |= VBLANK_INT_MASK;
5804         }
5805         if (rdev->irq.crtc_vblank_int[4] ||
5806             atomic_read(&rdev->irq.pflip[4])) {
5807                 DRM_DEBUG("si_irq_set: vblank 4\n");
5808                 crtc5 |= VBLANK_INT_MASK;
5809         }
5810         if (rdev->irq.crtc_vblank_int[5] ||
5811             atomic_read(&rdev->irq.pflip[5])) {
5812                 DRM_DEBUG("si_irq_set: vblank 5\n");
5813                 crtc6 |= VBLANK_INT_MASK;
5814         }
5815         if (rdev->irq.hpd[0]) {
5816                 DRM_DEBUG("si_irq_set: hpd 1\n");
5817                 hpd1 |= DC_HPDx_INT_EN;
5818         }
5819         if (rdev->irq.hpd[1]) {
5820                 DRM_DEBUG("si_irq_set: hpd 2\n");
5821                 hpd2 |= DC_HPDx_INT_EN;
5822         }
5823         if (rdev->irq.hpd[2]) {
5824                 DRM_DEBUG("si_irq_set: hpd 3\n");
5825                 hpd3 |= DC_HPDx_INT_EN;
5826         }
5827         if (rdev->irq.hpd[3]) {
5828                 DRM_DEBUG("si_irq_set: hpd 4\n");
5829                 hpd4 |= DC_HPDx_INT_EN;
5830         }
5831         if (rdev->irq.hpd[4]) {
5832                 DRM_DEBUG("si_irq_set: hpd 5\n");
5833                 hpd5 |= DC_HPDx_INT_EN;
5834         }
5835         if (rdev->irq.hpd[5]) {
5836                 DRM_DEBUG("si_irq_set: hpd 6\n");
5837                 hpd6 |= DC_HPDx_INT_EN;
5838         }
5839
5840         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5841         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5842         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5843
5844         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5845         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5846
5847         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5848
5849         if (rdev->irq.dpm_thermal) {
5850                 DRM_DEBUG("dpm thermal\n");
5851                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5852         }
5853
5854         if (rdev->num_crtc >= 2) {
5855                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5856                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5857         }
5858         if (rdev->num_crtc >= 4) {
5859                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5860                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5861         }
5862         if (rdev->num_crtc >= 6) {
5863                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5864                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5865         }
5866
5867         if (rdev->num_crtc >= 2) {
5868                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5869                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5870         }
5871         if (rdev->num_crtc >= 4) {
5872                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5873                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5874         }
5875         if (rdev->num_crtc >= 6) {
5876                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5877                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5878         }
5879
5880         if (!ASIC_IS_NODCE(rdev)) {
5881                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5882                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5883                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5884                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5885                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5886                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5887         }
5888
5889         WREG32(CG_THERMAL_INT, thermal_int);
5890
5891         return 0;
5892 }
5893
5894 static inline void si_irq_ack(struct radeon_device *rdev)
5895 {
5896         u32 tmp;
5897
5898         if (ASIC_IS_NODCE(rdev))
5899                 return;
5900
5901         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5902         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5903         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5904         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5905         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5906         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5907         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5908         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5909         if (rdev->num_crtc >= 4) {
5910                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5911                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5912         }
5913         if (rdev->num_crtc >= 6) {
5914                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5915                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5916         }
5917
5918         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5919                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5920         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5921                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5922         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5923                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5924         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5925                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5926         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5927                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5928         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5929                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5930
5931         if (rdev->num_crtc >= 4) {
5932                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5933                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5934                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5935                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5936                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5937                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5938                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5939                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5940                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5941                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5942                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5943                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5944         }
5945
5946         if (rdev->num_crtc >= 6) {
5947                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5948                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5949                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5950                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5951                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5952                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5953                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5954                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5955                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5956                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5957                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5958                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5959         }
5960
5961         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5962                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5963                 tmp |= DC_HPDx_INT_ACK;
5964                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5965         }
5966         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5967                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5968                 tmp |= DC_HPDx_INT_ACK;
5969                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5970         }
5971         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5972                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5973                 tmp |= DC_HPDx_INT_ACK;
5974                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5975         }
5976         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5977                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5978                 tmp |= DC_HPDx_INT_ACK;
5979                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5980         }
5981         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5982                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5983                 tmp |= DC_HPDx_INT_ACK;
5984                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5985         }
5986         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5987                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5988                 tmp |= DC_HPDx_INT_ACK;
5989                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5990         }
5991 }
5992
5993 static void si_irq_disable(struct radeon_device *rdev)
5994 {
5995         si_disable_interrupts(rdev);
5996         /* Wait and acknowledge irq */
5997         mdelay(1);
5998         si_irq_ack(rdev);
5999         si_disable_interrupt_state(rdev);
6000 }
6001
6002 static void si_irq_suspend(struct radeon_device *rdev)
6003 {
6004         si_irq_disable(rdev);
6005         si_rlc_stop(rdev);
6006 }
6007
6008 static void si_irq_fini(struct radeon_device *rdev)
6009 {
6010         si_irq_suspend(rdev);
6011         r600_ih_ring_fini(rdev);
6012 }
6013
6014 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6015 {
6016         u32 wptr, tmp;
6017
6018         if (rdev->wb.enabled)
6019                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6020         else
6021                 wptr = RREG32(IH_RB_WPTR);
6022
6023         if (wptr & RB_OVERFLOW) {
6024                 /* When a ring buffer overflow happen start parsing interrupt
6025                  * from the last not overwritten vector (wptr + 16). Hopefully
6026                  * this should allow us to catchup.
6027                  */
6028                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6029                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6030                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6031                 tmp = RREG32(IH_RB_CNTL);
6032                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6033                 WREG32(IH_RB_CNTL, tmp);
6034         }
6035         return (wptr & rdev->ih.ptr_mask);
6036 }
6037
6038 /*        SI IV Ring
6039  * Each IV ring entry is 128 bits:
6040  * [7:0]    - interrupt source id
6041  * [31:8]   - reserved
6042  * [59:32]  - interrupt source data
6043  * [63:60]  - reserved
6044  * [71:64]  - RINGID
6045  * [79:72]  - VMID
6046  * [127:80] - reserved
6047  */
6048 irqreturn_t si_irq_process(struct radeon_device *rdev)
6049 {
6050         u32 wptr;
6051         u32 rptr;
6052         u32 src_id, src_data, ring_id;
6053         u32 ring_index;
6054         bool queue_hotplug = false;
6055         bool queue_thermal = false;
6056         u32 status, addr;
6057
6058         if (!rdev->ih.enabled || rdev->shutdown)
6059                 return IRQ_NONE;
6060
6061         wptr = si_get_ih_wptr(rdev);
6062
6063 restart_ih:
6064         /* is somebody else already processing irqs? */
6065         if (atomic_xchg(&rdev->ih.lock, 1))
6066                 return IRQ_NONE;
6067
6068         rptr = rdev->ih.rptr;
6069         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6070
6071         /* Order reading of wptr vs. reading of IH ring data */
6072         rmb();
6073
6074         /* display interrupts */
6075         si_irq_ack(rdev);
6076
6077         while (rptr != wptr) {
6078                 /* wptr/rptr are in bytes! */
6079                 ring_index = rptr / 4;
6080                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6081                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6082                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6083
6084                 switch (src_id) {
6085                 case 1: /* D1 vblank/vline */
6086                         switch (src_data) {
6087                         case 0: /* D1 vblank */
6088                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6089                                         if (rdev->irq.crtc_vblank_int[0]) {
6090                                                 drm_handle_vblank(rdev->ddev, 0);
6091                                                 rdev->pm.vblank_sync = true;
6092                                                 wake_up(&rdev->irq.vblank_queue);
6093                                         }
6094                                         if (atomic_read(&rdev->irq.pflip[0]))
6095                                                 radeon_crtc_handle_flip(rdev, 0);
6096                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6097                                         DRM_DEBUG("IH: D1 vblank\n");
6098                                 }
6099                                 break;
6100                         case 1: /* D1 vline */
6101                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6102                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6103                                         DRM_DEBUG("IH: D1 vline\n");
6104                                 }
6105                                 break;
6106                         default:
6107                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6108                                 break;
6109                         }
6110                         break;
6111                 case 2: /* D2 vblank/vline */
6112                         switch (src_data) {
6113                         case 0: /* D2 vblank */
6114                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6115                                         if (rdev->irq.crtc_vblank_int[1]) {
6116                                                 drm_handle_vblank(rdev->ddev, 1);
6117                                                 rdev->pm.vblank_sync = true;
6118                                                 wake_up(&rdev->irq.vblank_queue);
6119                                         }
6120                                         if (atomic_read(&rdev->irq.pflip[1]))
6121                                                 radeon_crtc_handle_flip(rdev, 1);
6122                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6123                                         DRM_DEBUG("IH: D2 vblank\n");
6124                                 }
6125                                 break;
6126                         case 1: /* D2 vline */
6127                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6128                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6129                                         DRM_DEBUG("IH: D2 vline\n");
6130                                 }
6131                                 break;
6132                         default:
6133                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6134                                 break;
6135                         }
6136                         break;
6137                 case 3: /* D3 vblank/vline */
6138                         switch (src_data) {
6139                         case 0: /* D3 vblank */
6140                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6141                                         if (rdev->irq.crtc_vblank_int[2]) {
6142                                                 drm_handle_vblank(rdev->ddev, 2);
6143                                                 rdev->pm.vblank_sync = true;
6144                                                 wake_up(&rdev->irq.vblank_queue);
6145                                         }
6146                                         if (atomic_read(&rdev->irq.pflip[2]))
6147                                                 radeon_crtc_handle_flip(rdev, 2);
6148                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6149                                         DRM_DEBUG("IH: D3 vblank\n");
6150                                 }
6151                                 break;
6152                         case 1: /* D3 vline */
6153                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6154                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6155                                         DRM_DEBUG("IH: D3 vline\n");
6156                                 }
6157                                 break;
6158                         default:
6159                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6160                                 break;
6161                         }
6162                         break;
6163                 case 4: /* D4 vblank/vline */
6164                         switch (src_data) {
6165                         case 0: /* D4 vblank */
6166                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6167                                         if (rdev->irq.crtc_vblank_int[3]) {
6168                                                 drm_handle_vblank(rdev->ddev, 3);
6169                                                 rdev->pm.vblank_sync = true;
6170                                                 wake_up(&rdev->irq.vblank_queue);
6171                                         }
6172                                         if (atomic_read(&rdev->irq.pflip[3]))
6173                                                 radeon_crtc_handle_flip(rdev, 3);
6174                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6175                                         DRM_DEBUG("IH: D4 vblank\n");
6176                                 }
6177                                 break;
6178                         case 1: /* D4 vline */
6179                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6180                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6181                                         DRM_DEBUG("IH: D4 vline\n");
6182                                 }
6183                                 break;
6184                         default:
6185                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6186                                 break;
6187                         }
6188                         break;
6189                 case 5: /* D5 vblank/vline */
6190                         switch (src_data) {
6191                         case 0: /* D5 vblank */
6192                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6193                                         if (rdev->irq.crtc_vblank_int[4]) {
6194                                                 drm_handle_vblank(rdev->ddev, 4);
6195                                                 rdev->pm.vblank_sync = true;
6196                                                 wake_up(&rdev->irq.vblank_queue);
6197                                         }
6198                                         if (atomic_read(&rdev->irq.pflip[4]))
6199                                                 radeon_crtc_handle_flip(rdev, 4);
6200                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6201                                         DRM_DEBUG("IH: D5 vblank\n");
6202                                 }
6203                                 break;
6204                         case 1: /* D5 vline */
6205                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6206                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6207                                         DRM_DEBUG("IH: D5 vline\n");
6208                                 }
6209                                 break;
6210                         default:
6211                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6212                                 break;
6213                         }
6214                         break;
6215                 case 6: /* D6 vblank/vline */
6216                         switch (src_data) {
6217                         case 0: /* D6 vblank */
6218                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6219                                         if (rdev->irq.crtc_vblank_int[5]) {
6220                                                 drm_handle_vblank(rdev->ddev, 5);
6221                                                 rdev->pm.vblank_sync = true;
6222                                                 wake_up(&rdev->irq.vblank_queue);
6223                                         }
6224                                         if (atomic_read(&rdev->irq.pflip[5]))
6225                                                 radeon_crtc_handle_flip(rdev, 5);
6226                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6227                                         DRM_DEBUG("IH: D6 vblank\n");
6228                                 }
6229                                 break;
6230                         case 1: /* D6 vline */
6231                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6232                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6233                                         DRM_DEBUG("IH: D6 vline\n");
6234                                 }
6235                                 break;
6236                         default:
6237                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6238                                 break;
6239                         }
6240                         break;
6241                 case 42: /* HPD hotplug */
6242                         switch (src_data) {
6243                         case 0:
6244                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6245                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6246                                         queue_hotplug = true;
6247                                         DRM_DEBUG("IH: HPD1\n");
6248                                 }
6249                                 break;
6250                         case 1:
6251                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6252                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6253                                         queue_hotplug = true;
6254                                         DRM_DEBUG("IH: HPD2\n");
6255                                 }
6256                                 break;
6257                         case 2:
6258                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6259                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6260                                         queue_hotplug = true;
6261                                         DRM_DEBUG("IH: HPD3\n");
6262                                 }
6263                                 break;
6264                         case 3:
6265                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6266                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6267                                         queue_hotplug = true;
6268                                         DRM_DEBUG("IH: HPD4\n");
6269                                 }
6270                                 break;
6271                         case 4:
6272                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6273                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6274                                         queue_hotplug = true;
6275                                         DRM_DEBUG("IH: HPD5\n");
6276                                 }
6277                                 break;
6278                         case 5:
6279                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6280                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6281                                         queue_hotplug = true;
6282                                         DRM_DEBUG("IH: HPD6\n");
6283                                 }
6284                                 break;
6285                         default:
6286                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6287                                 break;
6288                         }
6289                         break;
6290                 case 146:
6291                 case 147:
6292                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6293                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6294                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6295                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6296                                 addr);
6297                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6298                                 status);
6299                         si_vm_decode_fault(rdev, status, addr);
6300                         /* reset addr and status */
6301                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6302                         break;
6303                 case 176: /* RINGID0 CP_INT */
6304                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6305                         break;
6306                 case 177: /* RINGID1 CP_INT */
6307                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6308                         break;
6309                 case 178: /* RINGID2 CP_INT */
6310                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6311                         break;
6312                 case 181: /* CP EOP event */
6313                         DRM_DEBUG("IH: CP EOP\n");
6314                         switch (ring_id) {
6315                         case 0:
6316                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6317                                 break;
6318                         case 1:
6319                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6320                                 break;
6321                         case 2:
6322                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6323                                 break;
6324                         }
6325                         break;
6326                 case 224: /* DMA trap event */
6327                         DRM_DEBUG("IH: DMA trap\n");
6328                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6329                         break;
6330                 case 230: /* thermal low to high */
6331                         DRM_DEBUG("IH: thermal low to high\n");
6332                         rdev->pm.dpm.thermal.high_to_low = false;
6333                         queue_thermal = true;
6334                         break;
6335                 case 231: /* thermal high to low */
6336                         DRM_DEBUG("IH: thermal high to low\n");
6337                         rdev->pm.dpm.thermal.high_to_low = true;
6338                         queue_thermal = true;
6339                         break;
6340                 case 233: /* GUI IDLE */
6341                         DRM_DEBUG("IH: GUI idle\n");
6342                         break;
6343                 case 244: /* DMA trap event */
6344                         DRM_DEBUG("IH: DMA1 trap\n");
6345                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6346                         break;
6347                 default:
6348                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6349                         break;
6350                 }
6351
6352                 /* wptr/rptr are in bytes! */
6353                 rptr += 16;
6354                 rptr &= rdev->ih.ptr_mask;
6355         }
6356         if (queue_hotplug)
6357                 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
6358         if (queue_thermal && rdev->pm.dpm_enabled)
6359                 taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
6360         rdev->ih.rptr = rptr;
6361         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6362         atomic_set(&rdev->ih.lock, 0);
6363
6364         /* make sure wptr hasn't changed while processing */
6365         wptr = si_get_ih_wptr(rdev);
6366         if (wptr != rptr)
6367                 goto restart_ih;
6368
6369         return IRQ_HANDLED;
6370 }
6371
6372 /*
6373  * startup/shutdown callbacks
6374  */
6375 static int si_startup(struct radeon_device *rdev)
6376 {
6377         struct radeon_ring *ring;
6378         int r;
6379
6380         /* enable pcie gen2/3 link */
6381         si_pcie_gen3_enable(rdev);
6382         /* enable aspm */
6383         si_program_aspm(rdev);
6384
6385         /* scratch needs to be initialized before MC */
6386         r = r600_vram_scratch_init(rdev);
6387         if (r)
6388                 return r;
6389
6390         si_mc_program(rdev);
6391
6392         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6393             !rdev->rlc_fw || !rdev->mc_fw) {
6394                 r = si_init_microcode(rdev);
6395                 if (r) {
6396                         DRM_ERROR("Failed to load firmware!\n");
6397                         return r;
6398                 }
6399         }
6400
6401         r = si_mc_load_microcode(rdev);
6402         if (r) {
6403                 DRM_ERROR("Failed to load MC firmware!\n");
6404                 return r;
6405         }
6406
6407         r = si_pcie_gart_enable(rdev);
6408         if (r)
6409                 return r;
6410         si_gpu_init(rdev);
6411
6412         /* allocate rlc buffers */
6413         if (rdev->family == CHIP_VERDE) {
6414                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6415                 rdev->rlc.reg_list_size =
6416                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6417         }
6418         rdev->rlc.cs_data = si_cs_data;
6419         r = sumo_rlc_init(rdev);
6420         if (r) {
6421                 DRM_ERROR("Failed to init rlc BOs!\n");
6422                 return r;
6423         }
6424
6425         /* allocate wb buffer */
6426         r = radeon_wb_init(rdev);
6427         if (r)
6428                 return r;
6429
6430         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6431         if (r) {
6432                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6433                 return r;
6434         }
6435
6436         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6437         if (r) {
6438                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6439                 return r;
6440         }
6441
6442         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6443         if (r) {
6444                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6445                 return r;
6446         }
6447
6448         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6449         if (r) {
6450                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6451                 return r;
6452         }
6453
6454         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6455         if (r) {
6456                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6457                 return r;
6458         }
6459
6460         if (rdev->has_uvd) {
6461                 r = uvd_v2_2_resume(rdev);
6462                 if (!r) {
6463                         r = radeon_fence_driver_start_ring(rdev,
6464                                                            R600_RING_TYPE_UVD_INDEX);
6465                         if (r)
6466                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6467                 }
6468                 if (r)
6469                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6470         }
6471
6472         /* Enable IRQ */
6473         if (!rdev->irq.installed) {
6474                 r = radeon_irq_kms_init(rdev);
6475                 if (r)
6476                         return r;
6477         }
6478
6479         r = si_irq_init(rdev);
6480         if (r) {
6481                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6482                 radeon_irq_kms_fini(rdev);
6483                 return r;
6484         }
6485         si_irq_set(rdev);
6486
6487         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6488         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6489                              CP_RB0_RPTR, CP_RB0_WPTR,
6490                              RADEON_CP_PACKET2);
6491         if (r)
6492                 return r;
6493
6494         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6495         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6496                              CP_RB1_RPTR, CP_RB1_WPTR,
6497                              RADEON_CP_PACKET2);
6498         if (r)
6499                 return r;
6500
6501         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6502         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6503                              CP_RB2_RPTR, CP_RB2_WPTR,
6504                              RADEON_CP_PACKET2);
6505         if (r)
6506                 return r;
6507
6508         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6509         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6510                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6511                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6512                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6513         if (r)
6514                 return r;
6515
6516         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6517         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6518                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6519                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6520                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6521         if (r)
6522                 return r;
6523
6524         r = si_cp_load_microcode(rdev);
6525         if (r)
6526                 return r;
6527         r = si_cp_resume(rdev);
6528         if (r)
6529                 return r;
6530
6531         r = cayman_dma_resume(rdev);
6532         if (r)
6533                 return r;
6534
6535         if (rdev->has_uvd) {
6536                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6537                 if (ring->ring_size) {
6538                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6539                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6540                                              RADEON_CP_PACKET2);
6541                         if (!r)
6542                                 r = uvd_v1_0_init(rdev);
6543                         if (r)
6544                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6545                 }
6546         }
6547
6548         r = radeon_ib_pool_init(rdev);
6549         if (r) {
6550                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6551                 return r;
6552         }
6553
6554         r = radeon_vm_manager_init(rdev);
6555         if (r) {
6556                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6557                 return r;
6558         }
6559
6560         r = dce6_audio_init(rdev);
6561         if (r)
6562                 return r;
6563
6564         return 0;
6565 }
6566
6567 int si_resume(struct radeon_device *rdev)
6568 {
6569         int r;
6570
6571         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6572          * posting will perform necessary task to bring back GPU into good
6573          * shape.
6574          */
6575         /* post card */
6576         atom_asic_init(rdev->mode_info.atom_context);
6577
6578         /* init golden registers */
6579         si_init_golden_registers(rdev);
6580
6581         rdev->accel_working = true;
6582         r = si_startup(rdev);
6583         if (r) {
6584                 DRM_ERROR("si startup failed on resume\n");
6585                 rdev->accel_working = false;
6586                 return r;
6587         }
6588
6589         return r;
6590
6591 }
6592
6593 int si_suspend(struct radeon_device *rdev)
6594 {
6595         dce6_audio_fini(rdev);
6596         radeon_vm_manager_fini(rdev);
6597         si_cp_enable(rdev, false);
6598         cayman_dma_stop(rdev);
6599         if (rdev->has_uvd) {
6600                 uvd_v1_0_fini(rdev);
6601                 radeon_uvd_suspend(rdev);
6602         }
6603         si_fini_pg(rdev);
6604         si_fini_cg(rdev);
6605         si_irq_suspend(rdev);
6606         radeon_wb_disable(rdev);
6607         si_pcie_gart_disable(rdev);
6608         return 0;
6609 }
6610
6611 /* Plan is to move initialization in that function and use
6612  * helper function so that radeon_device_init pretty much
6613  * do nothing more than calling asic specific function. This
6614  * should also allow to remove a bunch of callback function
6615  * like vram_info.
6616  */
6617 int si_init(struct radeon_device *rdev)
6618 {
6619         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6620         int r;
6621
6622         /* Read BIOS */
6623         if (!radeon_get_bios(rdev)) {
6624                 if (ASIC_IS_AVIVO(rdev))
6625                         return -EINVAL;
6626         }
6627         /* Must be an ATOMBIOS */
6628         if (!rdev->is_atom_bios) {
6629                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6630                 return -EINVAL;
6631         }
6632         r = radeon_atombios_init(rdev);
6633         if (r)
6634                 return r;
6635
6636         /* Post card if necessary */
6637         if (!radeon_card_posted(rdev)) {
6638                 if (!rdev->bios) {
6639                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6640                         return -EINVAL;
6641                 }
6642                 DRM_INFO("GPU not posted. posting now...\n");
6643                 atom_asic_init(rdev->mode_info.atom_context);
6644         }
6645         /* init golden registers */
6646         si_init_golden_registers(rdev);
6647         /* Initialize scratch registers */
6648         si_scratch_init(rdev);
6649         /* Initialize surface registers */
6650         radeon_surface_init(rdev);
6651         /* Initialize clocks */
6652         radeon_get_clock_info(rdev->ddev);
6653
6654         /* Fence driver */
6655         r = radeon_fence_driver_init(rdev);
6656         if (r)
6657                 return r;
6658
6659         /* initialize memory controller */
6660         r = si_mc_init(rdev);
6661         if (r)
6662                 return r;
6663         /* Memory manager */
6664         r = radeon_bo_init(rdev);
6665         if (r)
6666                 return r;
6667
6668         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6669         ring->ring_obj = NULL;
6670         r600_ring_init(rdev, ring, 1024 * 1024);
6671
6672         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6673         ring->ring_obj = NULL;
6674         r600_ring_init(rdev, ring, 1024 * 1024);
6675
6676         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6677         ring->ring_obj = NULL;
6678         r600_ring_init(rdev, ring, 1024 * 1024);
6679
6680         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6681         ring->ring_obj = NULL;
6682         r600_ring_init(rdev, ring, 64 * 1024);
6683
6684         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6685         ring->ring_obj = NULL;
6686         r600_ring_init(rdev, ring, 64 * 1024);
6687
6688         if (rdev->has_uvd) {
6689                 r = radeon_uvd_init(rdev);
6690                 if (!r) {
6691                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6692                         ring->ring_obj = NULL;
6693                         r600_ring_init(rdev, ring, 4096);
6694                 }
6695         }
6696
6697         rdev->ih.ring_obj = NULL;
6698         r600_ih_ring_init(rdev, 64 * 1024);
6699
6700         r = r600_pcie_gart_init(rdev);
6701         if (r)
6702                 return r;
6703
6704         rdev->accel_working = true;
6705         r = si_startup(rdev);
6706         if (r) {
6707                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6708                 si_cp_fini(rdev);
6709                 cayman_dma_fini(rdev);
6710                 si_irq_fini(rdev);
6711                 sumo_rlc_fini(rdev);
6712                 radeon_wb_fini(rdev);
6713                 radeon_ib_pool_fini(rdev);
6714                 radeon_vm_manager_fini(rdev);
6715                 radeon_irq_kms_fini(rdev);
6716                 si_pcie_gart_fini(rdev);
6717                 rdev->accel_working = false;
6718         }
6719
6720         /* Don't start up if the MC ucode is missing.
6721          * The default clocks and voltages before the MC ucode
6722          * is loaded are not suffient for advanced operations.
6723          */
6724         if (!rdev->mc_fw) {
6725                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6726                 return -EINVAL;
6727         }
6728
6729         return 0;
6730 }
6731
6732 void si_fini(struct radeon_device *rdev)
6733 {
6734         si_cp_fini(rdev);
6735         cayman_dma_fini(rdev);
6736         si_fini_pg(rdev);
6737         si_fini_cg(rdev);
6738         si_irq_fini(rdev);
6739         sumo_rlc_fini(rdev);
6740         radeon_wb_fini(rdev);
6741         radeon_vm_manager_fini(rdev);
6742         radeon_ib_pool_fini(rdev);
6743         radeon_irq_kms_fini(rdev);
6744         if (rdev->has_uvd) {
6745                 uvd_v1_0_fini(rdev);
6746                 radeon_uvd_fini(rdev);
6747         }
6748         si_pcie_gart_fini(rdev);
6749         r600_vram_scratch_fini(rdev);
6750         radeon_gem_fini(rdev);
6751         radeon_fence_driver_fini(rdev);
6752         radeon_bo_fini(rdev);
6753         radeon_atombios_fini(rdev);
6754         si_fini_microcode(rdev);
6755         kfree(rdev->bios);
6756         rdev->bios = NULL;
6757 }
6758
6759 /**
6760  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6761  *
6762  * @rdev: radeon_device pointer
6763  *
6764  * Fetches a GPU clock counter snapshot (SI).
6765  * Returns the 64 bit clock counter snapshot.
6766  */
6767 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6768 {
6769         uint64_t clock;
6770
6771         spin_lock(&rdev->gpu_clock_mutex);
6772         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6773         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6774                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6775         spin_unlock(&rdev->gpu_clock_mutex);
6776         return clock;
6777 }
6778
6779 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6780 {
6781         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6782         int r;
6783
6784         /* bypass vclk and dclk with bclk */
6785         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6786                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6787                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6788
6789         /* put PLL in bypass mode */
6790         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6791
6792         if (!vclk || !dclk) {
6793                 /* keep the Bypass mode, put PLL to sleep */
6794                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6795                 return 0;
6796         }
6797
6798         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6799                                           16384, 0x03FFFFFF, 0, 128, 5,
6800                                           &fb_div, &vclk_div, &dclk_div);
6801         if (r)
6802                 return r;
6803
6804         /* set RESET_ANTI_MUX to 0 */
6805         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6806
6807         /* set VCO_MODE to 1 */
6808         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6809
6810         /* toggle UPLL_SLEEP to 1 then back to 0 */
6811         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6812         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6813
6814         /* deassert UPLL_RESET */
6815         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6816
6817         mdelay(1);
6818
6819         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6820         if (r)
6821                 return r;
6822
6823         /* assert UPLL_RESET again */
6824         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6825
6826         /* disable spread spectrum. */
6827         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6828
6829         /* set feedback divider */
6830         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6831
6832         /* set ref divider to 0 */
6833         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6834
6835         if (fb_div < 307200)
6836                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6837         else
6838                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6839
6840         /* set PDIV_A and PDIV_B */
6841         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6842                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6843                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6844
6845         /* give the PLL some time to settle */
6846         mdelay(15);
6847
6848         /* deassert PLL_RESET */
6849         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6850
6851         mdelay(15);
6852
6853         /* switch from bypass mode to normal mode */
6854         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6855
6856         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6857         if (r)
6858                 return r;
6859
6860         /* switch VCLK and DCLK selection */
6861         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6862                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6863                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6864
6865         mdelay(100);
6866
6867         return 0;
6868 }
6869
6870 static struct pci_dev dev_to_pcidev(device_t dev)
6871 {
6872     struct pci_dev pdev;
6873     pdev.dev = dev;
6874     return pdev;
6875 }
6876
6877 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6878 {
6879 #if 0
6880         struct pci_dev *root = rdev->pdev->bus->self;
6881 #else
6882         device_t root = device_get_parent(rdev->dev);
6883 #endif
6884         int bridge_pos, gpu_pos;
6885         u32 speed_cntl, mask, current_data_rate;
6886         int ret, i;
6887         u16 tmp16;
6888         struct pci_dev root_pdev = dev_to_pcidev(root);
6889         struct pci_dev pdev = dev_to_pcidev(rdev->dev);
6890
6891         if (radeon_pcie_gen2 == 0)
6892                 return;
6893
6894         if (rdev->flags & RADEON_IS_IGP)
6895                 return;
6896
6897         if (!(rdev->flags & RADEON_IS_PCIE))
6898                 return;
6899
6900         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6901         if (ret != 0)
6902                 return;
6903
6904         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6905                 return;
6906
6907         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6908         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6909                 LC_CURRENT_DATA_RATE_SHIFT;
6910         if (mask & DRM_PCIE_SPEED_80) {
6911                 if (current_data_rate == 2) {
6912                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6913                         return;
6914                 }
6915                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6916         } else if (mask & DRM_PCIE_SPEED_50) {
6917                 if (current_data_rate == 1) {
6918                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6919                         return;
6920                 }
6921                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6922         }
6923
6924         bridge_pos = pci_get_pciecap_ptr(root);
6925         if (!bridge_pos)
6926                 return;
6927
6928         gpu_pos = pci_get_pciecap_ptr(rdev->dev);
6929         if (!gpu_pos)
6930                 return;
6931
6932         if (mask & DRM_PCIE_SPEED_80) {
6933                 /* re-try equalization if gen3 is not already enabled */
6934                 if (current_data_rate != 2) {
6935                         u16 bridge_cfg, gpu_cfg;
6936                         u16 bridge_cfg2, gpu_cfg2;
6937                         u32 max_lw, current_lw, tmp;
6938
6939                         pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6940                         pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6941
6942                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6943                         pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6944
6945                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6946                         pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6947
6948                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6949                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6950                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6951
6952                         if (current_lw < max_lw) {
6953                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6954                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6955                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6956                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6957                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6958                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6959                                 }
6960                         }
6961
6962                         for (i = 0; i < 10; i++) {
6963                                 /* check status */
6964                                 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6965                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6966                                         break;
6967
6968                                 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6969                                 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6970
6971                                 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6972                                 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6973
6974                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6975                                 tmp |= LC_SET_QUIESCE;
6976                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6977
6978                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6979                                 tmp |= LC_REDO_EQ;
6980                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6981
6982                                 mdelay(100);
6983
6984                                 /* linkctl */
6985                                 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6986                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6987                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6988                                 pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6989
6990                                 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6991                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6992                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6993                                 pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6994
6995                                 /* linkctl2 */
6996                                 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6997                                 tmp16 &= ~((1 << 4) | (7 << 9));
6998                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6999                                 pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7000
7001                                 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7002                                 tmp16 &= ~((1 << 4) | (7 << 9));
7003                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7004                                 pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7005
7006                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7007                                 tmp &= ~LC_SET_QUIESCE;
7008                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7009                         }
7010                 }
7011         }
7012
7013         /* set the link speed */
7014         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7015         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7016         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7017
7018         pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7019         tmp16 &= ~0xf;
7020         if (mask & DRM_PCIE_SPEED_80)
7021                 tmp16 |= 3; /* gen3 */
7022         else if (mask & DRM_PCIE_SPEED_50)
7023                 tmp16 |= 2; /* gen2 */
7024         else
7025                 tmp16 |= 1; /* gen1 */
7026         pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7027
7028         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7029         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7030         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7031
7032         for (i = 0; i < rdev->usec_timeout; i++) {
7033                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7034                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7035                         break;
7036                 udelay(1);
7037         }
7038 }
7039
7040 static void si_program_aspm(struct radeon_device *rdev)
7041 {
7042         u32 data, orig;
7043         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7044         bool disable_clkreq = false;
7045
7046         if (radeon_aspm == 0)
7047                 return;
7048
7049         if (!(rdev->flags & RADEON_IS_PCIE))
7050                 return;
7051
7052         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7053         data &= ~LC_XMIT_N_FTS_MASK;
7054         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7055         if (orig != data)
7056                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7057
7058         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7059         data |= LC_GO_TO_RECOVERY;
7060         if (orig != data)
7061                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7062
7063         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7064         data |= P_IGNORE_EDB_ERR;
7065         if (orig != data)
7066                 WREG32_PCIE(PCIE_P_CNTL, data);
7067
7068         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7069         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7070         data |= LC_PMI_TO_L1_DIS;
7071         if (!disable_l0s)
7072                 data |= LC_L0S_INACTIVITY(7);
7073
7074         if (!disable_l1) {
7075                 data |= LC_L1_INACTIVITY(7);
7076                 data &= ~LC_PMI_TO_L1_DIS;
7077                 if (orig != data)
7078                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7079
7080                 if (!disable_plloff_in_l1) {
7081                         bool clk_req_support;
7082
7083                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7084                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7085                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7086                         if (orig != data)
7087                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7088
7089                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7090                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7091                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7092                         if (orig != data)
7093                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7094
7095                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7096                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7097                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7098                         if (orig != data)
7099                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7100
7101                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7102                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7103                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7104                         if (orig != data)
7105                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7106
7107                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7108                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7109                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7110                                 if (orig != data)
7111                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7112
7113                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7114                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7115                                 if (orig != data)
7116                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7117
7118                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7119                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7120                                 if (orig != data)
7121                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7122
7123                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7124                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7125                                 if (orig != data)
7126                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7127
7128                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7129                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7130                                 if (orig != data)
7131                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7132
7133                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7134                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7135                                 if (orig != data)
7136                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7137
7138                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7139                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7140                                 if (orig != data)
7141                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7142
7143                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7144                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7145                                 if (orig != data)
7146                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7147                         }
7148                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7149                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7150                         data |= LC_DYN_LANES_PWR_STATE(3);
7151                         if (orig != data)
7152                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7153
7154                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7155                         data &= ~LS2_EXIT_TIME_MASK;
7156                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7157                                 data |= LS2_EXIT_TIME(5);
7158                         if (orig != data)
7159                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7160
7161                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7162                         data &= ~LS2_EXIT_TIME_MASK;
7163                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7164                                 data |= LS2_EXIT_TIME(5);
7165                         if (orig != data)
7166                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7167
7168                         if (!disable_clkreq) {
7169 #ifdef zMN_TODO
7170                                 struct pci_dev *root = rdev->pdev->bus->self;
7171                                 u32 lnkcap;
7172
7173                                 clk_req_support = false;
7174                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7175                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7176                                         clk_req_support = true;
7177 #else
7178                                 clk_req_support = false;
7179 #endif
7180                         } else {
7181                                 clk_req_support = false;
7182                         }
7183
7184                         if (clk_req_support) {
7185                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7186                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7187                                 if (orig != data)
7188                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7189
7190                                 orig = data = RREG32(THM_CLK_CNTL);
7191                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7192                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7193                                 if (orig != data)
7194                                         WREG32(THM_CLK_CNTL, data);
7195
7196                                 orig = data = RREG32(MISC_CLK_CNTL);
7197                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7198                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7199                                 if (orig != data)
7200                                         WREG32(MISC_CLK_CNTL, data);
7201
7202                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7203                                 data &= ~BCLK_AS_XCLK;
7204                                 if (orig != data)
7205                                         WREG32(CG_CLKPIN_CNTL, data);
7206
7207                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7208                                 data &= ~FORCE_BIF_REFCLK_EN;
7209                                 if (orig != data)
7210                                         WREG32(CG_CLKPIN_CNTL_2, data);
7211
7212                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7213                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7214                                 data |= MPLL_CLKOUT_SEL(4);
7215                                 if (orig != data)
7216                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7217
7218                                 orig = data = RREG32(SPLL_CNTL_MODE);
7219                                 data &= ~SPLL_REFCLK_SEL_MASK;
7220                                 if (orig != data)
7221                                         WREG32(SPLL_CNTL_MODE, data);
7222                         }
7223                 }
7224         } else {
7225                 if (orig != data)
7226                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7227         }
7228
7229         orig = data = RREG32_PCIE(PCIE_CNTL2);
7230         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7231         if (orig != data)
7232                 WREG32_PCIE(PCIE_CNTL2, data);
7233
7234         if (!disable_l0s) {
7235                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7236                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7237                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7238                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7239                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7240                                 data &= ~LC_L0S_INACTIVITY_MASK;
7241                                 if (orig != data)
7242                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7243                         }
7244                 }
7245         }
7246 }