drm/radeon: Sync to Linux 3.11
[dragonfly.git] / sys / dev / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  *
24  * $FreeBSD: head/sys/dev/drm2/radeon/si.c 254885 2013-08-25 19:37:15Z dumbbell $
25  */
26
27 #include <drm/drmP.h>
28 #include <linux/firmware.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <uapi_drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37
38
39 #define PCI_EXP_LNKCTL PCIER_LINKCTRL /* 16 */
40 #define PCI_EXP_LNKCTL2 48
41 #define PCI_EXP_LNKCTL_HAWD PCIEM_LNKCTL_HAWD /* 0x0200 */
42 #define PCI_EXP_DEVSTA PCIER_DEVSTS /* 10 */
43 #define PCI_EXP_DEVSTA_TRPND 0x0020
44 #define PCI_EXP_LNKCAP_CLKPM 0x00040000
45
46 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
53 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
56 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
57 MODULE_FIRMWARE("radeon/VERDE_me.bin");
58 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
59 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
60 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
61 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
63 MODULE_FIRMWARE("radeon/OLAND_me.bin");
64 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
65 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
66 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
67 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
68 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
69 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
70 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
71 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
72 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
73 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
74
75 static void si_pcie_gen3_enable(struct radeon_device *rdev);
76 static void si_program_aspm(struct radeon_device *rdev);
77 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
78
79 static const u32 verde_rlc_save_restore_register_list[] =
80 {
81         (0x8000 << 16) | (0x98f4 >> 2),
82         0x00000000,
83         (0x8040 << 16) | (0x98f4 >> 2),
84         0x00000000,
85         (0x8000 << 16) | (0xe80 >> 2),
86         0x00000000,
87         (0x8040 << 16) | (0xe80 >> 2),
88         0x00000000,
89         (0x8000 << 16) | (0x89bc >> 2),
90         0x00000000,
91         (0x8040 << 16) | (0x89bc >> 2),
92         0x00000000,
93         (0x8000 << 16) | (0x8c1c >> 2),
94         0x00000000,
95         (0x8040 << 16) | (0x8c1c >> 2),
96         0x00000000,
97         (0x9c00 << 16) | (0x98f0 >> 2),
98         0x00000000,
99         (0x9c00 << 16) | (0xe7c >> 2),
100         0x00000000,
101         (0x8000 << 16) | (0x9148 >> 2),
102         0x00000000,
103         (0x8040 << 16) | (0x9148 >> 2),
104         0x00000000,
105         (0x9c00 << 16) | (0x9150 >> 2),
106         0x00000000,
107         (0x9c00 << 16) | (0x897c >> 2),
108         0x00000000,
109         (0x9c00 << 16) | (0x8d8c >> 2),
110         0x00000000,
111         (0x9c00 << 16) | (0xac54 >> 2),
112         0X00000000,
113         0x3,
114         (0x9c00 << 16) | (0x98f8 >> 2),
115         0x00000000,
116         (0x9c00 << 16) | (0x9910 >> 2),
117         0x00000000,
118         (0x9c00 << 16) | (0x9914 >> 2),
119         0x00000000,
120         (0x9c00 << 16) | (0x9918 >> 2),
121         0x00000000,
122         (0x9c00 << 16) | (0x991c >> 2),
123         0x00000000,
124         (0x9c00 << 16) | (0x9920 >> 2),
125         0x00000000,
126         (0x9c00 << 16) | (0x9924 >> 2),
127         0x00000000,
128         (0x9c00 << 16) | (0x9928 >> 2),
129         0x00000000,
130         (0x9c00 << 16) | (0x992c >> 2),
131         0x00000000,
132         (0x9c00 << 16) | (0x9930 >> 2),
133         0x00000000,
134         (0x9c00 << 16) | (0x9934 >> 2),
135         0x00000000,
136         (0x9c00 << 16) | (0x9938 >> 2),
137         0x00000000,
138         (0x9c00 << 16) | (0x993c >> 2),
139         0x00000000,
140         (0x9c00 << 16) | (0x9940 >> 2),
141         0x00000000,
142         (0x9c00 << 16) | (0x9944 >> 2),
143         0x00000000,
144         (0x9c00 << 16) | (0x9948 >> 2),
145         0x00000000,
146         (0x9c00 << 16) | (0x994c >> 2),
147         0x00000000,
148         (0x9c00 << 16) | (0x9950 >> 2),
149         0x00000000,
150         (0x9c00 << 16) | (0x9954 >> 2),
151         0x00000000,
152         (0x9c00 << 16) | (0x9958 >> 2),
153         0x00000000,
154         (0x9c00 << 16) | (0x995c >> 2),
155         0x00000000,
156         (0x9c00 << 16) | (0x9960 >> 2),
157         0x00000000,
158         (0x9c00 << 16) | (0x9964 >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0x9968 >> 2),
161         0x00000000,
162         (0x9c00 << 16) | (0x996c >> 2),
163         0x00000000,
164         (0x9c00 << 16) | (0x9970 >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0x9974 >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x9978 >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x997c >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x9980 >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9984 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9988 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x998c >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x8c00 >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x8c14 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x8c04 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x8c08 >> 2),
187         0x00000000,
188         (0x8000 << 16) | (0x9b7c >> 2),
189         0x00000000,
190         (0x8040 << 16) | (0x9b7c >> 2),
191         0x00000000,
192         (0x8000 << 16) | (0xe84 >> 2),
193         0x00000000,
194         (0x8040 << 16) | (0xe84 >> 2),
195         0x00000000,
196         (0x8000 << 16) | (0x89c0 >> 2),
197         0x00000000,
198         (0x8040 << 16) | (0x89c0 >> 2),
199         0x00000000,
200         (0x8000 << 16) | (0x914c >> 2),
201         0x00000000,
202         (0x8040 << 16) | (0x914c >> 2),
203         0x00000000,
204         (0x8000 << 16) | (0x8c20 >> 2),
205         0x00000000,
206         (0x8040 << 16) | (0x8c20 >> 2),
207         0x00000000,
208         (0x8000 << 16) | (0x9354 >> 2),
209         0x00000000,
210         (0x8040 << 16) | (0x9354 >> 2),
211         0x00000000,
212         (0x9c00 << 16) | (0x9060 >> 2),
213         0x00000000,
214         (0x9c00 << 16) | (0x9364 >> 2),
215         0x00000000,
216         (0x9c00 << 16) | (0x9100 >> 2),
217         0x00000000,
218         (0x9c00 << 16) | (0x913c >> 2),
219         0x00000000,
220         (0x8000 << 16) | (0x90e0 >> 2),
221         0x00000000,
222         (0x8000 << 16) | (0x90e4 >> 2),
223         0x00000000,
224         (0x8000 << 16) | (0x90e8 >> 2),
225         0x00000000,
226         (0x8040 << 16) | (0x90e0 >> 2),
227         0x00000000,
228         (0x8040 << 16) | (0x90e4 >> 2),
229         0x00000000,
230         (0x8040 << 16) | (0x90e8 >> 2),
231         0x00000000,
232         (0x9c00 << 16) | (0x8bcc >> 2),
233         0x00000000,
234         (0x9c00 << 16) | (0x8b24 >> 2),
235         0x00000000,
236         (0x9c00 << 16) | (0x88c4 >> 2),
237         0x00000000,
238         (0x9c00 << 16) | (0x8e50 >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x8c0c >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x8e58 >> 2),
243         0x00000000,
244         (0x9c00 << 16) | (0x8e5c >> 2),
245         0x00000000,
246         (0x9c00 << 16) | (0x9508 >> 2),
247         0x00000000,
248         (0x9c00 << 16) | (0x950c >> 2),
249         0x00000000,
250         (0x9c00 << 16) | (0x9494 >> 2),
251         0x00000000,
252         (0x9c00 << 16) | (0xac0c >> 2),
253         0x00000000,
254         (0x9c00 << 16) | (0xac10 >> 2),
255         0x00000000,
256         (0x9c00 << 16) | (0xac14 >> 2),
257         0x00000000,
258         (0x9c00 << 16) | (0xae00 >> 2),
259         0x00000000,
260         (0x9c00 << 16) | (0xac08 >> 2),
261         0x00000000,
262         (0x9c00 << 16) | (0x88d4 >> 2),
263         0x00000000,
264         (0x9c00 << 16) | (0x88c8 >> 2),
265         0x00000000,
266         (0x9c00 << 16) | (0x88cc >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0x89b0 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x8b10 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x8a14 >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x9830 >> 2),
275         0x00000000,
276         (0x9c00 << 16) | (0x9834 >> 2),
277         0x00000000,
278         (0x9c00 << 16) | (0x9838 >> 2),
279         0x00000000,
280         (0x9c00 << 16) | (0x9a10 >> 2),
281         0x00000000,
282         (0x8000 << 16) | (0x9870 >> 2),
283         0x00000000,
284         (0x8000 << 16) | (0x9874 >> 2),
285         0x00000000,
286         (0x8001 << 16) | (0x9870 >> 2),
287         0x00000000,
288         (0x8001 << 16) | (0x9874 >> 2),
289         0x00000000,
290         (0x8040 << 16) | (0x9870 >> 2),
291         0x00000000,
292         (0x8040 << 16) | (0x9874 >> 2),
293         0x00000000,
294         (0x8041 << 16) | (0x9870 >> 2),
295         0x00000000,
296         (0x8041 << 16) | (0x9874 >> 2),
297         0x00000000,
298         0x00000000
299 };
300
301 static const u32 tahiti_golden_rlc_registers[] =
302 {
303         0xc424, 0xffffffff, 0x00601005,
304         0xc47c, 0xffffffff, 0x10104040,
305         0xc488, 0xffffffff, 0x0100000a,
306         0xc314, 0xffffffff, 0x00000800,
307         0xc30c, 0xffffffff, 0x800000f4,
308         0xf4a8, 0xffffffff, 0x00000000
309 };
310
311 static const u32 tahiti_golden_registers[] =
312 {
313         0x9a10, 0x00010000, 0x00018208,
314         0x9830, 0xffffffff, 0x00000000,
315         0x9834, 0xf00fffff, 0x00000400,
316         0x9838, 0x0002021c, 0x00020200,
317         0xc78, 0x00000080, 0x00000000,
318         0xd030, 0x000300c0, 0x00800040,
319         0xd830, 0x000300c0, 0x00800040,
320         0x5bb0, 0x000000f0, 0x00000070,
321         0x5bc0, 0x00200000, 0x50100000,
322         0x7030, 0x31000311, 0x00000011,
323         0x277c, 0x00000003, 0x000007ff,
324         0x240c, 0x000007ff, 0x00000000,
325         0x8a14, 0xf000001f, 0x00000007,
326         0x8b24, 0xffffffff, 0x00ffffff,
327         0x8b10, 0x0000ff0f, 0x00000000,
328         0x28a4c, 0x07ffffff, 0x4e000000,
329         0x28350, 0x3f3f3fff, 0x2a00126a,
330         0x30, 0x000000ff, 0x0040,
331         0x34, 0x00000040, 0x00004040,
332         0x9100, 0x07ffffff, 0x03000000,
333         0x8e88, 0x01ff1f3f, 0x00000000,
334         0x8e84, 0x01ff1f3f, 0x00000000,
335         0x9060, 0x0000007f, 0x00000020,
336         0x9508, 0x00010000, 0x00010000,
337         0xac14, 0x00000200, 0x000002fb,
338         0xac10, 0xffffffff, 0x0000543b,
339         0xac0c, 0xffffffff, 0xa9210876,
340         0x88d0, 0xffffffff, 0x000fff40,
341         0x88d4, 0x0000001f, 0x00000010,
342         0x1410, 0x20000000, 0x20fffed8,
343         0x15c0, 0x000c0fc0, 0x000c0400
344 };
345
346 static const u32 tahiti_golden_registers2[] =
347 {
348         0xc64, 0x00000001, 0x00000001
349 };
350
351 static const u32 pitcairn_golden_rlc_registers[] =
352 {
353         0xc424, 0xffffffff, 0x00601004,
354         0xc47c, 0xffffffff, 0x10102020,
355         0xc488, 0xffffffff, 0x01000020,
356         0xc314, 0xffffffff, 0x00000800,
357         0xc30c, 0xffffffff, 0x800000a4
358 };
359
360 static const u32 pitcairn_golden_registers[] =
361 {
362         0x9a10, 0x00010000, 0x00018208,
363         0x9830, 0xffffffff, 0x00000000,
364         0x9834, 0xf00fffff, 0x00000400,
365         0x9838, 0x0002021c, 0x00020200,
366         0xc78, 0x00000080, 0x00000000,
367         0xd030, 0x000300c0, 0x00800040,
368         0xd830, 0x000300c0, 0x00800040,
369         0x5bb0, 0x000000f0, 0x00000070,
370         0x5bc0, 0x00200000, 0x50100000,
371         0x7030, 0x31000311, 0x00000011,
372         0x2ae4, 0x00073ffe, 0x000022a2,
373         0x240c, 0x000007ff, 0x00000000,
374         0x8a14, 0xf000001f, 0x00000007,
375         0x8b24, 0xffffffff, 0x00ffffff,
376         0x8b10, 0x0000ff0f, 0x00000000,
377         0x28a4c, 0x07ffffff, 0x4e000000,
378         0x28350, 0x3f3f3fff, 0x2a00126a,
379         0x30, 0x000000ff, 0x0040,
380         0x34, 0x00000040, 0x00004040,
381         0x9100, 0x07ffffff, 0x03000000,
382         0x9060, 0x0000007f, 0x00000020,
383         0x9508, 0x00010000, 0x00010000,
384         0xac14, 0x000003ff, 0x000000f7,
385         0xac10, 0xffffffff, 0x00000000,
386         0xac0c, 0xffffffff, 0x32761054,
387         0x88d4, 0x0000001f, 0x00000010,
388         0x15c0, 0x000c0fc0, 0x000c0400
389 };
390
391 static const u32 verde_golden_rlc_registers[] =
392 {
393         0xc424, 0xffffffff, 0x033f1005,
394         0xc47c, 0xffffffff, 0x10808020,
395         0xc488, 0xffffffff, 0x00800008,
396         0xc314, 0xffffffff, 0x00001000,
397         0xc30c, 0xffffffff, 0x80010014
398 };
399
400 static const u32 verde_golden_registers[] =
401 {
402         0x9a10, 0x00010000, 0x00018208,
403         0x9830, 0xffffffff, 0x00000000,
404         0x9834, 0xf00fffff, 0x00000400,
405         0x9838, 0x0002021c, 0x00020200,
406         0xc78, 0x00000080, 0x00000000,
407         0xd030, 0x000300c0, 0x00800040,
408         0xd030, 0x000300c0, 0x00800040,
409         0xd830, 0x000300c0, 0x00800040,
410         0xd830, 0x000300c0, 0x00800040,
411         0x5bb0, 0x000000f0, 0x00000070,
412         0x5bc0, 0x00200000, 0x50100000,
413         0x7030, 0x31000311, 0x00000011,
414         0x2ae4, 0x00073ffe, 0x000022a2,
415         0x2ae4, 0x00073ffe, 0x000022a2,
416         0x2ae4, 0x00073ffe, 0x000022a2,
417         0x240c, 0x000007ff, 0x00000000,
418         0x240c, 0x000007ff, 0x00000000,
419         0x240c, 0x000007ff, 0x00000000,
420         0x8a14, 0xf000001f, 0x00000007,
421         0x8a14, 0xf000001f, 0x00000007,
422         0x8a14, 0xf000001f, 0x00000007,
423         0x8b24, 0xffffffff, 0x00ffffff,
424         0x8b10, 0x0000ff0f, 0x00000000,
425         0x28a4c, 0x07ffffff, 0x4e000000,
426         0x28350, 0x3f3f3fff, 0x0000124a,
427         0x28350, 0x3f3f3fff, 0x0000124a,
428         0x28350, 0x3f3f3fff, 0x0000124a,
429         0x30, 0x000000ff, 0x0040,
430         0x34, 0x00000040, 0x00004040,
431         0x9100, 0x07ffffff, 0x03000000,
432         0x9100, 0x07ffffff, 0x03000000,
433         0x8e88, 0x01ff1f3f, 0x00000000,
434         0x8e88, 0x01ff1f3f, 0x00000000,
435         0x8e88, 0x01ff1f3f, 0x00000000,
436         0x8e84, 0x01ff1f3f, 0x00000000,
437         0x8e84, 0x01ff1f3f, 0x00000000,
438         0x8e84, 0x01ff1f3f, 0x00000000,
439         0x9060, 0x0000007f, 0x00000020,
440         0x9508, 0x00010000, 0x00010000,
441         0xac14, 0x000003ff, 0x00000003,
442         0xac14, 0x000003ff, 0x00000003,
443         0xac14, 0x000003ff, 0x00000003,
444         0xac10, 0xffffffff, 0x00000000,
445         0xac10, 0xffffffff, 0x00000000,
446         0xac10, 0xffffffff, 0x00000000,
447         0xac0c, 0xffffffff, 0x00001032,
448         0xac0c, 0xffffffff, 0x00001032,
449         0xac0c, 0xffffffff, 0x00001032,
450         0x88d4, 0x0000001f, 0x00000010,
451         0x88d4, 0x0000001f, 0x00000010,
452         0x88d4, 0x0000001f, 0x00000010,
453         0x15c0, 0x000c0fc0, 0x000c0400
454 };
455
456 static const u32 oland_golden_rlc_registers[] =
457 {
458         0xc424, 0xffffffff, 0x00601005,
459         0xc47c, 0xffffffff, 0x10104040,
460         0xc488, 0xffffffff, 0x0100000a,
461         0xc314, 0xffffffff, 0x00000800,
462         0xc30c, 0xffffffff, 0x800000f4
463 };
464
465 static const u32 oland_golden_registers[] =
466 {
467         0x9a10, 0x00010000, 0x00018208,
468         0x9830, 0xffffffff, 0x00000000,
469         0x9834, 0xf00fffff, 0x00000400,
470         0x9838, 0x0002021c, 0x00020200,
471         0xc78, 0x00000080, 0x00000000,
472         0xd030, 0x000300c0, 0x00800040,
473         0xd830, 0x000300c0, 0x00800040,
474         0x5bb0, 0x000000f0, 0x00000070,
475         0x5bc0, 0x00200000, 0x50100000,
476         0x7030, 0x31000311, 0x00000011,
477         0x2ae4, 0x00073ffe, 0x000022a2,
478         0x240c, 0x000007ff, 0x00000000,
479         0x8a14, 0xf000001f, 0x00000007,
480         0x8b24, 0xffffffff, 0x00ffffff,
481         0x8b10, 0x0000ff0f, 0x00000000,
482         0x28a4c, 0x07ffffff, 0x4e000000,
483         0x28350, 0x3f3f3fff, 0x00000082,
484         0x30, 0x000000ff, 0x0040,
485         0x34, 0x00000040, 0x00004040,
486         0x9100, 0x07ffffff, 0x03000000,
487         0x9060, 0x0000007f, 0x00000020,
488         0x9508, 0x00010000, 0x00010000,
489         0xac14, 0x000003ff, 0x000000f3,
490         0xac10, 0xffffffff, 0x00000000,
491         0xac0c, 0xffffffff, 0x00003210,
492         0x88d4, 0x0000001f, 0x00000010,
493         0x15c0, 0x000c0fc0, 0x000c0400
494 };
495
496 static const u32 hainan_golden_registers[] =
497 {
498         0x9a10, 0x00010000, 0x00018208,
499         0x9830, 0xffffffff, 0x00000000,
500         0x9834, 0xf00fffff, 0x00000400,
501         0x9838, 0x0002021c, 0x00020200,
502         0xd0c0, 0xff000fff, 0x00000100,
503         0xd030, 0x000300c0, 0x00800040,
504         0xd8c0, 0xff000fff, 0x00000100,
505         0xd830, 0x000300c0, 0x00800040,
506         0x2ae4, 0x00073ffe, 0x000022a2,
507         0x240c, 0x000007ff, 0x00000000,
508         0x8a14, 0xf000001f, 0x00000007,
509         0x8b24, 0xffffffff, 0x00ffffff,
510         0x8b10, 0x0000ff0f, 0x00000000,
511         0x28a4c, 0x07ffffff, 0x4e000000,
512         0x28350, 0x3f3f3fff, 0x00000000,
513         0x30, 0x000000ff, 0x0040,
514         0x34, 0x00000040, 0x00004040,
515         0x9100, 0x03e00000, 0x03600000,
516         0x9060, 0x0000007f, 0x00000020,
517         0x9508, 0x00010000, 0x00010000,
518         0xac14, 0x000003ff, 0x000000f1,
519         0xac10, 0xffffffff, 0x00000000,
520         0xac0c, 0xffffffff, 0x00003210,
521         0x88d4, 0x0000001f, 0x00000010,
522         0x15c0, 0x000c0fc0, 0x000c0400
523 };
524
525 static const u32 hainan_golden_registers2[] =
526 {
527         0x98f8, 0xffffffff, 0x02010001
528 };
529
530 static const u32 tahiti_mgcg_cgcg_init[] =
531 {
532         0xc400, 0xffffffff, 0xfffffffc,
533         0x802c, 0xffffffff, 0xe0000000,
534         0x9a60, 0xffffffff, 0x00000100,
535         0x92a4, 0xffffffff, 0x00000100,
536         0xc164, 0xffffffff, 0x00000100,
537         0x9774, 0xffffffff, 0x00000100,
538         0x8984, 0xffffffff, 0x06000100,
539         0x8a18, 0xffffffff, 0x00000100,
540         0x92a0, 0xffffffff, 0x00000100,
541         0xc380, 0xffffffff, 0x00000100,
542         0x8b28, 0xffffffff, 0x00000100,
543         0x9144, 0xffffffff, 0x00000100,
544         0x8d88, 0xffffffff, 0x00000100,
545         0x8d8c, 0xffffffff, 0x00000100,
546         0x9030, 0xffffffff, 0x00000100,
547         0x9034, 0xffffffff, 0x00000100,
548         0x9038, 0xffffffff, 0x00000100,
549         0x903c, 0xffffffff, 0x00000100,
550         0xad80, 0xffffffff, 0x00000100,
551         0xac54, 0xffffffff, 0x00000100,
552         0x897c, 0xffffffff, 0x06000100,
553         0x9868, 0xffffffff, 0x00000100,
554         0x9510, 0xffffffff, 0x00000100,
555         0xaf04, 0xffffffff, 0x00000100,
556         0xae04, 0xffffffff, 0x00000100,
557         0x949c, 0xffffffff, 0x00000100,
558         0x802c, 0xffffffff, 0xe0000000,
559         0x9160, 0xffffffff, 0x00010000,
560         0x9164, 0xffffffff, 0x00030002,
561         0x9168, 0xffffffff, 0x00040007,
562         0x916c, 0xffffffff, 0x00060005,
563         0x9170, 0xffffffff, 0x00090008,
564         0x9174, 0xffffffff, 0x00020001,
565         0x9178, 0xffffffff, 0x00040003,
566         0x917c, 0xffffffff, 0x00000007,
567         0x9180, 0xffffffff, 0x00060005,
568         0x9184, 0xffffffff, 0x00090008,
569         0x9188, 0xffffffff, 0x00030002,
570         0x918c, 0xffffffff, 0x00050004,
571         0x9190, 0xffffffff, 0x00000008,
572         0x9194, 0xffffffff, 0x00070006,
573         0x9198, 0xffffffff, 0x000a0009,
574         0x919c, 0xffffffff, 0x00040003,
575         0x91a0, 0xffffffff, 0x00060005,
576         0x91a4, 0xffffffff, 0x00000009,
577         0x91a8, 0xffffffff, 0x00080007,
578         0x91ac, 0xffffffff, 0x000b000a,
579         0x91b0, 0xffffffff, 0x00050004,
580         0x91b4, 0xffffffff, 0x00070006,
581         0x91b8, 0xffffffff, 0x0008000b,
582         0x91bc, 0xffffffff, 0x000a0009,
583         0x91c0, 0xffffffff, 0x000d000c,
584         0x91c4, 0xffffffff, 0x00060005,
585         0x91c8, 0xffffffff, 0x00080007,
586         0x91cc, 0xffffffff, 0x0000000b,
587         0x91d0, 0xffffffff, 0x000a0009,
588         0x91d4, 0xffffffff, 0x000d000c,
589         0x91d8, 0xffffffff, 0x00070006,
590         0x91dc, 0xffffffff, 0x00090008,
591         0x91e0, 0xffffffff, 0x0000000c,
592         0x91e4, 0xffffffff, 0x000b000a,
593         0x91e8, 0xffffffff, 0x000e000d,
594         0x91ec, 0xffffffff, 0x00080007,
595         0x91f0, 0xffffffff, 0x000a0009,
596         0x91f4, 0xffffffff, 0x0000000d,
597         0x91f8, 0xffffffff, 0x000c000b,
598         0x91fc, 0xffffffff, 0x000f000e,
599         0x9200, 0xffffffff, 0x00090008,
600         0x9204, 0xffffffff, 0x000b000a,
601         0x9208, 0xffffffff, 0x000c000f,
602         0x920c, 0xffffffff, 0x000e000d,
603         0x9210, 0xffffffff, 0x00110010,
604         0x9214, 0xffffffff, 0x000a0009,
605         0x9218, 0xffffffff, 0x000c000b,
606         0x921c, 0xffffffff, 0x0000000f,
607         0x9220, 0xffffffff, 0x000e000d,
608         0x9224, 0xffffffff, 0x00110010,
609         0x9228, 0xffffffff, 0x000b000a,
610         0x922c, 0xffffffff, 0x000d000c,
611         0x9230, 0xffffffff, 0x00000010,
612         0x9234, 0xffffffff, 0x000f000e,
613         0x9238, 0xffffffff, 0x00120011,
614         0x923c, 0xffffffff, 0x000c000b,
615         0x9240, 0xffffffff, 0x000e000d,
616         0x9244, 0xffffffff, 0x00000011,
617         0x9248, 0xffffffff, 0x0010000f,
618         0x924c, 0xffffffff, 0x00130012,
619         0x9250, 0xffffffff, 0x000d000c,
620         0x9254, 0xffffffff, 0x000f000e,
621         0x9258, 0xffffffff, 0x00100013,
622         0x925c, 0xffffffff, 0x00120011,
623         0x9260, 0xffffffff, 0x00150014,
624         0x9264, 0xffffffff, 0x000e000d,
625         0x9268, 0xffffffff, 0x0010000f,
626         0x926c, 0xffffffff, 0x00000013,
627         0x9270, 0xffffffff, 0x00120011,
628         0x9274, 0xffffffff, 0x00150014,
629         0x9278, 0xffffffff, 0x000f000e,
630         0x927c, 0xffffffff, 0x00110010,
631         0x9280, 0xffffffff, 0x00000014,
632         0x9284, 0xffffffff, 0x00130012,
633         0x9288, 0xffffffff, 0x00160015,
634         0x928c, 0xffffffff, 0x0010000f,
635         0x9290, 0xffffffff, 0x00120011,
636         0x9294, 0xffffffff, 0x00000015,
637         0x9298, 0xffffffff, 0x00140013,
638         0x929c, 0xffffffff, 0x00170016,
639         0x9150, 0xffffffff, 0x96940200,
640         0x8708, 0xffffffff, 0x00900100,
641         0xc478, 0xffffffff, 0x00000080,
642         0xc404, 0xffffffff, 0x0020003f,
643         0x30, 0xffffffff, 0x0000001c,
644         0x34, 0x000f0000, 0x000f0000,
645         0x160c, 0xffffffff, 0x00000100,
646         0x1024, 0xffffffff, 0x00000100,
647         0x102c, 0x00000101, 0x00000000,
648         0x20a8, 0xffffffff, 0x00000104,
649         0x264c, 0x000c0000, 0x000c0000,
650         0x2648, 0x000c0000, 0x000c0000,
651         0x55e4, 0xff000fff, 0x00000100,
652         0x55e8, 0x00000001, 0x00000001,
653         0x2f50, 0x00000001, 0x00000001,
654         0x30cc, 0xc0000fff, 0x00000104,
655         0xc1e4, 0x00000001, 0x00000001,
656         0xd0c0, 0xfffffff0, 0x00000100,
657         0xd8c0, 0xfffffff0, 0x00000100
658 };
659
660 static const u32 pitcairn_mgcg_cgcg_init[] =
661 {
662         0xc400, 0xffffffff, 0xfffffffc,
663         0x802c, 0xffffffff, 0xe0000000,
664         0x9a60, 0xffffffff, 0x00000100,
665         0x92a4, 0xffffffff, 0x00000100,
666         0xc164, 0xffffffff, 0x00000100,
667         0x9774, 0xffffffff, 0x00000100,
668         0x8984, 0xffffffff, 0x06000100,
669         0x8a18, 0xffffffff, 0x00000100,
670         0x92a0, 0xffffffff, 0x00000100,
671         0xc380, 0xffffffff, 0x00000100,
672         0x8b28, 0xffffffff, 0x00000100,
673         0x9144, 0xffffffff, 0x00000100,
674         0x8d88, 0xffffffff, 0x00000100,
675         0x8d8c, 0xffffffff, 0x00000100,
676         0x9030, 0xffffffff, 0x00000100,
677         0x9034, 0xffffffff, 0x00000100,
678         0x9038, 0xffffffff, 0x00000100,
679         0x903c, 0xffffffff, 0x00000100,
680         0xad80, 0xffffffff, 0x00000100,
681         0xac54, 0xffffffff, 0x00000100,
682         0x897c, 0xffffffff, 0x06000100,
683         0x9868, 0xffffffff, 0x00000100,
684         0x9510, 0xffffffff, 0x00000100,
685         0xaf04, 0xffffffff, 0x00000100,
686         0xae04, 0xffffffff, 0x00000100,
687         0x949c, 0xffffffff, 0x00000100,
688         0x802c, 0xffffffff, 0xe0000000,
689         0x9160, 0xffffffff, 0x00010000,
690         0x9164, 0xffffffff, 0x00030002,
691         0x9168, 0xffffffff, 0x00040007,
692         0x916c, 0xffffffff, 0x00060005,
693         0x9170, 0xffffffff, 0x00090008,
694         0x9174, 0xffffffff, 0x00020001,
695         0x9178, 0xffffffff, 0x00040003,
696         0x917c, 0xffffffff, 0x00000007,
697         0x9180, 0xffffffff, 0x00060005,
698         0x9184, 0xffffffff, 0x00090008,
699         0x9188, 0xffffffff, 0x00030002,
700         0x918c, 0xffffffff, 0x00050004,
701         0x9190, 0xffffffff, 0x00000008,
702         0x9194, 0xffffffff, 0x00070006,
703         0x9198, 0xffffffff, 0x000a0009,
704         0x919c, 0xffffffff, 0x00040003,
705         0x91a0, 0xffffffff, 0x00060005,
706         0x91a4, 0xffffffff, 0x00000009,
707         0x91a8, 0xffffffff, 0x00080007,
708         0x91ac, 0xffffffff, 0x000b000a,
709         0x91b0, 0xffffffff, 0x00050004,
710         0x91b4, 0xffffffff, 0x00070006,
711         0x91b8, 0xffffffff, 0x0008000b,
712         0x91bc, 0xffffffff, 0x000a0009,
713         0x91c0, 0xffffffff, 0x000d000c,
714         0x9200, 0xffffffff, 0x00090008,
715         0x9204, 0xffffffff, 0x000b000a,
716         0x9208, 0xffffffff, 0x000c000f,
717         0x920c, 0xffffffff, 0x000e000d,
718         0x9210, 0xffffffff, 0x00110010,
719         0x9214, 0xffffffff, 0x000a0009,
720         0x9218, 0xffffffff, 0x000c000b,
721         0x921c, 0xffffffff, 0x0000000f,
722         0x9220, 0xffffffff, 0x000e000d,
723         0x9224, 0xffffffff, 0x00110010,
724         0x9228, 0xffffffff, 0x000b000a,
725         0x922c, 0xffffffff, 0x000d000c,
726         0x9230, 0xffffffff, 0x00000010,
727         0x9234, 0xffffffff, 0x000f000e,
728         0x9238, 0xffffffff, 0x00120011,
729         0x923c, 0xffffffff, 0x000c000b,
730         0x9240, 0xffffffff, 0x000e000d,
731         0x9244, 0xffffffff, 0x00000011,
732         0x9248, 0xffffffff, 0x0010000f,
733         0x924c, 0xffffffff, 0x00130012,
734         0x9250, 0xffffffff, 0x000d000c,
735         0x9254, 0xffffffff, 0x000f000e,
736         0x9258, 0xffffffff, 0x00100013,
737         0x925c, 0xffffffff, 0x00120011,
738         0x9260, 0xffffffff, 0x00150014,
739         0x9150, 0xffffffff, 0x96940200,
740         0x8708, 0xffffffff, 0x00900100,
741         0xc478, 0xffffffff, 0x00000080,
742         0xc404, 0xffffffff, 0x0020003f,
743         0x30, 0xffffffff, 0x0000001c,
744         0x34, 0x000f0000, 0x000f0000,
745         0x160c, 0xffffffff, 0x00000100,
746         0x1024, 0xffffffff, 0x00000100,
747         0x102c, 0x00000101, 0x00000000,
748         0x20a8, 0xffffffff, 0x00000104,
749         0x55e4, 0xff000fff, 0x00000100,
750         0x55e8, 0x00000001, 0x00000001,
751         0x2f50, 0x00000001, 0x00000001,
752         0x30cc, 0xc0000fff, 0x00000104,
753         0xc1e4, 0x00000001, 0x00000001,
754         0xd0c0, 0xfffffff0, 0x00000100,
755         0xd8c0, 0xfffffff0, 0x00000100
756 };
757
758 static const u32 verde_mgcg_cgcg_init[] =
759 {
760         0xc400, 0xffffffff, 0xfffffffc,
761         0x802c, 0xffffffff, 0xe0000000,
762         0x9a60, 0xffffffff, 0x00000100,
763         0x92a4, 0xffffffff, 0x00000100,
764         0xc164, 0xffffffff, 0x00000100,
765         0x9774, 0xffffffff, 0x00000100,
766         0x8984, 0xffffffff, 0x06000100,
767         0x8a18, 0xffffffff, 0x00000100,
768         0x92a0, 0xffffffff, 0x00000100,
769         0xc380, 0xffffffff, 0x00000100,
770         0x8b28, 0xffffffff, 0x00000100,
771         0x9144, 0xffffffff, 0x00000100,
772         0x8d88, 0xffffffff, 0x00000100,
773         0x8d8c, 0xffffffff, 0x00000100,
774         0x9030, 0xffffffff, 0x00000100,
775         0x9034, 0xffffffff, 0x00000100,
776         0x9038, 0xffffffff, 0x00000100,
777         0x903c, 0xffffffff, 0x00000100,
778         0xad80, 0xffffffff, 0x00000100,
779         0xac54, 0xffffffff, 0x00000100,
780         0x897c, 0xffffffff, 0x06000100,
781         0x9868, 0xffffffff, 0x00000100,
782         0x9510, 0xffffffff, 0x00000100,
783         0xaf04, 0xffffffff, 0x00000100,
784         0xae04, 0xffffffff, 0x00000100,
785         0x949c, 0xffffffff, 0x00000100,
786         0x802c, 0xffffffff, 0xe0000000,
787         0x9160, 0xffffffff, 0x00010000,
788         0x9164, 0xffffffff, 0x00030002,
789         0x9168, 0xffffffff, 0x00040007,
790         0x916c, 0xffffffff, 0x00060005,
791         0x9170, 0xffffffff, 0x00090008,
792         0x9174, 0xffffffff, 0x00020001,
793         0x9178, 0xffffffff, 0x00040003,
794         0x917c, 0xffffffff, 0x00000007,
795         0x9180, 0xffffffff, 0x00060005,
796         0x9184, 0xffffffff, 0x00090008,
797         0x9188, 0xffffffff, 0x00030002,
798         0x918c, 0xffffffff, 0x00050004,
799         0x9190, 0xffffffff, 0x00000008,
800         0x9194, 0xffffffff, 0x00070006,
801         0x9198, 0xffffffff, 0x000a0009,
802         0x919c, 0xffffffff, 0x00040003,
803         0x91a0, 0xffffffff, 0x00060005,
804         0x91a4, 0xffffffff, 0x00000009,
805         0x91a8, 0xffffffff, 0x00080007,
806         0x91ac, 0xffffffff, 0x000b000a,
807         0x91b0, 0xffffffff, 0x00050004,
808         0x91b4, 0xffffffff, 0x00070006,
809         0x91b8, 0xffffffff, 0x0008000b,
810         0x91bc, 0xffffffff, 0x000a0009,
811         0x91c0, 0xffffffff, 0x000d000c,
812         0x9200, 0xffffffff, 0x00090008,
813         0x9204, 0xffffffff, 0x000b000a,
814         0x9208, 0xffffffff, 0x000c000f,
815         0x920c, 0xffffffff, 0x000e000d,
816         0x9210, 0xffffffff, 0x00110010,
817         0x9214, 0xffffffff, 0x000a0009,
818         0x9218, 0xffffffff, 0x000c000b,
819         0x921c, 0xffffffff, 0x0000000f,
820         0x9220, 0xffffffff, 0x000e000d,
821         0x9224, 0xffffffff, 0x00110010,
822         0x9228, 0xffffffff, 0x000b000a,
823         0x922c, 0xffffffff, 0x000d000c,
824         0x9230, 0xffffffff, 0x00000010,
825         0x9234, 0xffffffff, 0x000f000e,
826         0x9238, 0xffffffff, 0x00120011,
827         0x923c, 0xffffffff, 0x000c000b,
828         0x9240, 0xffffffff, 0x000e000d,
829         0x9244, 0xffffffff, 0x00000011,
830         0x9248, 0xffffffff, 0x0010000f,
831         0x924c, 0xffffffff, 0x00130012,
832         0x9250, 0xffffffff, 0x000d000c,
833         0x9254, 0xffffffff, 0x000f000e,
834         0x9258, 0xffffffff, 0x00100013,
835         0x925c, 0xffffffff, 0x00120011,
836         0x9260, 0xffffffff, 0x00150014,
837         0x9150, 0xffffffff, 0x96940200,
838         0x8708, 0xffffffff, 0x00900100,
839         0xc478, 0xffffffff, 0x00000080,
840         0xc404, 0xffffffff, 0x0020003f,
841         0x30, 0xffffffff, 0x0000001c,
842         0x34, 0x000f0000, 0x000f0000,
843         0x160c, 0xffffffff, 0x00000100,
844         0x1024, 0xffffffff, 0x00000100,
845         0x102c, 0x00000101, 0x00000000,
846         0x20a8, 0xffffffff, 0x00000104,
847         0x264c, 0x000c0000, 0x000c0000,
848         0x2648, 0x000c0000, 0x000c0000,
849         0x55e4, 0xff000fff, 0x00000100,
850         0x55e8, 0x00000001, 0x00000001,
851         0x2f50, 0x00000001, 0x00000001,
852         0x30cc, 0xc0000fff, 0x00000104,
853         0xc1e4, 0x00000001, 0x00000001,
854         0xd0c0, 0xfffffff0, 0x00000100,
855         0xd8c0, 0xfffffff0, 0x00000100
856 };
857
858 static const u32 oland_mgcg_cgcg_init[] =
859 {
860         0xc400, 0xffffffff, 0xfffffffc,
861         0x802c, 0xffffffff, 0xe0000000,
862         0x9a60, 0xffffffff, 0x00000100,
863         0x92a4, 0xffffffff, 0x00000100,
864         0xc164, 0xffffffff, 0x00000100,
865         0x9774, 0xffffffff, 0x00000100,
866         0x8984, 0xffffffff, 0x06000100,
867         0x8a18, 0xffffffff, 0x00000100,
868         0x92a0, 0xffffffff, 0x00000100,
869         0xc380, 0xffffffff, 0x00000100,
870         0x8b28, 0xffffffff, 0x00000100,
871         0x9144, 0xffffffff, 0x00000100,
872         0x8d88, 0xffffffff, 0x00000100,
873         0x8d8c, 0xffffffff, 0x00000100,
874         0x9030, 0xffffffff, 0x00000100,
875         0x9034, 0xffffffff, 0x00000100,
876         0x9038, 0xffffffff, 0x00000100,
877         0x903c, 0xffffffff, 0x00000100,
878         0xad80, 0xffffffff, 0x00000100,
879         0xac54, 0xffffffff, 0x00000100,
880         0x897c, 0xffffffff, 0x06000100,
881         0x9868, 0xffffffff, 0x00000100,
882         0x9510, 0xffffffff, 0x00000100,
883         0xaf04, 0xffffffff, 0x00000100,
884         0xae04, 0xffffffff, 0x00000100,
885         0x949c, 0xffffffff, 0x00000100,
886         0x802c, 0xffffffff, 0xe0000000,
887         0x9160, 0xffffffff, 0x00010000,
888         0x9164, 0xffffffff, 0x00030002,
889         0x9168, 0xffffffff, 0x00040007,
890         0x916c, 0xffffffff, 0x00060005,
891         0x9170, 0xffffffff, 0x00090008,
892         0x9174, 0xffffffff, 0x00020001,
893         0x9178, 0xffffffff, 0x00040003,
894         0x917c, 0xffffffff, 0x00000007,
895         0x9180, 0xffffffff, 0x00060005,
896         0x9184, 0xffffffff, 0x00090008,
897         0x9188, 0xffffffff, 0x00030002,
898         0x918c, 0xffffffff, 0x00050004,
899         0x9190, 0xffffffff, 0x00000008,
900         0x9194, 0xffffffff, 0x00070006,
901         0x9198, 0xffffffff, 0x000a0009,
902         0x919c, 0xffffffff, 0x00040003,
903         0x91a0, 0xffffffff, 0x00060005,
904         0x91a4, 0xffffffff, 0x00000009,
905         0x91a8, 0xffffffff, 0x00080007,
906         0x91ac, 0xffffffff, 0x000b000a,
907         0x91b0, 0xffffffff, 0x00050004,
908         0x91b4, 0xffffffff, 0x00070006,
909         0x91b8, 0xffffffff, 0x0008000b,
910         0x91bc, 0xffffffff, 0x000a0009,
911         0x91c0, 0xffffffff, 0x000d000c,
912         0x91c4, 0xffffffff, 0x00060005,
913         0x91c8, 0xffffffff, 0x00080007,
914         0x91cc, 0xffffffff, 0x0000000b,
915         0x91d0, 0xffffffff, 0x000a0009,
916         0x91d4, 0xffffffff, 0x000d000c,
917         0x9150, 0xffffffff, 0x96940200,
918         0x8708, 0xffffffff, 0x00900100,
919         0xc478, 0xffffffff, 0x00000080,
920         0xc404, 0xffffffff, 0x0020003f,
921         0x30, 0xffffffff, 0x0000001c,
922         0x34, 0x000f0000, 0x000f0000,
923         0x160c, 0xffffffff, 0x00000100,
924         0x1024, 0xffffffff, 0x00000100,
925         0x102c, 0x00000101, 0x00000000,
926         0x20a8, 0xffffffff, 0x00000104,
927         0x264c, 0x000c0000, 0x000c0000,
928         0x2648, 0x000c0000, 0x000c0000,
929         0x55e4, 0xff000fff, 0x00000100,
930         0x55e8, 0x00000001, 0x00000001,
931         0x2f50, 0x00000001, 0x00000001,
932         0x30cc, 0xc0000fff, 0x00000104,
933         0xc1e4, 0x00000001, 0x00000001,
934         0xd0c0, 0xfffffff0, 0x00000100,
935         0xd8c0, 0xfffffff0, 0x00000100
936 };
937
938 static const u32 hainan_mgcg_cgcg_init[] =
939 {
940         0xc400, 0xffffffff, 0xfffffffc,
941         0x802c, 0xffffffff, 0xe0000000,
942         0x9a60, 0xffffffff, 0x00000100,
943         0x92a4, 0xffffffff, 0x00000100,
944         0xc164, 0xffffffff, 0x00000100,
945         0x9774, 0xffffffff, 0x00000100,
946         0x8984, 0xffffffff, 0x06000100,
947         0x8a18, 0xffffffff, 0x00000100,
948         0x92a0, 0xffffffff, 0x00000100,
949         0xc380, 0xffffffff, 0x00000100,
950         0x8b28, 0xffffffff, 0x00000100,
951         0x9144, 0xffffffff, 0x00000100,
952         0x8d88, 0xffffffff, 0x00000100,
953         0x8d8c, 0xffffffff, 0x00000100,
954         0x9030, 0xffffffff, 0x00000100,
955         0x9034, 0xffffffff, 0x00000100,
956         0x9038, 0xffffffff, 0x00000100,
957         0x903c, 0xffffffff, 0x00000100,
958         0xad80, 0xffffffff, 0x00000100,
959         0xac54, 0xffffffff, 0x00000100,
960         0x897c, 0xffffffff, 0x06000100,
961         0x9868, 0xffffffff, 0x00000100,
962         0x9510, 0xffffffff, 0x00000100,
963         0xaf04, 0xffffffff, 0x00000100,
964         0xae04, 0xffffffff, 0x00000100,
965         0x949c, 0xffffffff, 0x00000100,
966         0x802c, 0xffffffff, 0xe0000000,
967         0x9160, 0xffffffff, 0x00010000,
968         0x9164, 0xffffffff, 0x00030002,
969         0x9168, 0xffffffff, 0x00040007,
970         0x916c, 0xffffffff, 0x00060005,
971         0x9170, 0xffffffff, 0x00090008,
972         0x9174, 0xffffffff, 0x00020001,
973         0x9178, 0xffffffff, 0x00040003,
974         0x917c, 0xffffffff, 0x00000007,
975         0x9180, 0xffffffff, 0x00060005,
976         0x9184, 0xffffffff, 0x00090008,
977         0x9188, 0xffffffff, 0x00030002,
978         0x918c, 0xffffffff, 0x00050004,
979         0x9190, 0xffffffff, 0x00000008,
980         0x9194, 0xffffffff, 0x00070006,
981         0x9198, 0xffffffff, 0x000a0009,
982         0x919c, 0xffffffff, 0x00040003,
983         0x91a0, 0xffffffff, 0x00060005,
984         0x91a4, 0xffffffff, 0x00000009,
985         0x91a8, 0xffffffff, 0x00080007,
986         0x91ac, 0xffffffff, 0x000b000a,
987         0x91b0, 0xffffffff, 0x00050004,
988         0x91b4, 0xffffffff, 0x00070006,
989         0x91b8, 0xffffffff, 0x0008000b,
990         0x91bc, 0xffffffff, 0x000a0009,
991         0x91c0, 0xffffffff, 0x000d000c,
992         0x91c4, 0xffffffff, 0x00060005,
993         0x91c8, 0xffffffff, 0x00080007,
994         0x91cc, 0xffffffff, 0x0000000b,
995         0x91d0, 0xffffffff, 0x000a0009,
996         0x91d4, 0xffffffff, 0x000d000c,
997         0x9150, 0xffffffff, 0x96940200,
998         0x8708, 0xffffffff, 0x00900100,
999         0xc478, 0xffffffff, 0x00000080,
1000         0xc404, 0xffffffff, 0x0020003f,
1001         0x30, 0xffffffff, 0x0000001c,
1002         0x34, 0x000f0000, 0x000f0000,
1003         0x160c, 0xffffffff, 0x00000100,
1004         0x1024, 0xffffffff, 0x00000100,
1005         0x20a8, 0xffffffff, 0x00000104,
1006         0x264c, 0x000c0000, 0x000c0000,
1007         0x2648, 0x000c0000, 0x000c0000,
1008         0x2f50, 0x00000001, 0x00000001,
1009         0x30cc, 0xc0000fff, 0x00000104,
1010         0xc1e4, 0x00000001, 0x00000001,
1011         0xd0c0, 0xfffffff0, 0x00000100,
1012         0xd8c0, 0xfffffff0, 0x00000100
1013 };
1014
1015 static u32 verde_pg_init[] =
1016 {
1017         0x353c, 0xffffffff, 0x40000,
1018         0x3538, 0xffffffff, 0x200010ff,
1019         0x353c, 0xffffffff, 0x0,
1020         0x353c, 0xffffffff, 0x0,
1021         0x353c, 0xffffffff, 0x0,
1022         0x353c, 0xffffffff, 0x0,
1023         0x353c, 0xffffffff, 0x0,
1024         0x353c, 0xffffffff, 0x7007,
1025         0x3538, 0xffffffff, 0x300010ff,
1026         0x353c, 0xffffffff, 0x0,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x400000,
1032         0x3538, 0xffffffff, 0x100010ff,
1033         0x353c, 0xffffffff, 0x0,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x120200,
1039         0x3538, 0xffffffff, 0x500010ff,
1040         0x353c, 0xffffffff, 0x0,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x1e1e16,
1046         0x3538, 0xffffffff, 0x600010ff,
1047         0x353c, 0xffffffff, 0x0,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x171f1e,
1053         0x3538, 0xffffffff, 0x700010ff,
1054         0x353c, 0xffffffff, 0x0,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x3538, 0xffffffff, 0x9ff,
1061         0x3500, 0xffffffff, 0x0,
1062         0x3504, 0xffffffff, 0x10000800,
1063         0x3504, 0xffffffff, 0xf,
1064         0x3504, 0xffffffff, 0xf,
1065         0x3500, 0xffffffff, 0x4,
1066         0x3504, 0xffffffff, 0x1000051e,
1067         0x3504, 0xffffffff, 0xffff,
1068         0x3504, 0xffffffff, 0xffff,
1069         0x3500, 0xffffffff, 0x8,
1070         0x3504, 0xffffffff, 0x80500,
1071         0x3500, 0xffffffff, 0x12,
1072         0x3504, 0xffffffff, 0x9050c,
1073         0x3500, 0xffffffff, 0x1d,
1074         0x3504, 0xffffffff, 0xb052c,
1075         0x3500, 0xffffffff, 0x2a,
1076         0x3504, 0xffffffff, 0x1053e,
1077         0x3500, 0xffffffff, 0x2d,
1078         0x3504, 0xffffffff, 0x10546,
1079         0x3500, 0xffffffff, 0x30,
1080         0x3504, 0xffffffff, 0xa054e,
1081         0x3500, 0xffffffff, 0x3c,
1082         0x3504, 0xffffffff, 0x1055f,
1083         0x3500, 0xffffffff, 0x3f,
1084         0x3504, 0xffffffff, 0x10567,
1085         0x3500, 0xffffffff, 0x42,
1086         0x3504, 0xffffffff, 0x1056f,
1087         0x3500, 0xffffffff, 0x45,
1088         0x3504, 0xffffffff, 0x10572,
1089         0x3500, 0xffffffff, 0x48,
1090         0x3504, 0xffffffff, 0x20575,
1091         0x3500, 0xffffffff, 0x4c,
1092         0x3504, 0xffffffff, 0x190801,
1093         0x3500, 0xffffffff, 0x67,
1094         0x3504, 0xffffffff, 0x1082a,
1095         0x3500, 0xffffffff, 0x6a,
1096         0x3504, 0xffffffff, 0x1b082d,
1097         0x3500, 0xffffffff, 0x87,
1098         0x3504, 0xffffffff, 0x310851,
1099         0x3500, 0xffffffff, 0xba,
1100         0x3504, 0xffffffff, 0x891,
1101         0x3500, 0xffffffff, 0xbc,
1102         0x3504, 0xffffffff, 0x893,
1103         0x3500, 0xffffffff, 0xbe,
1104         0x3504, 0xffffffff, 0x20895,
1105         0x3500, 0xffffffff, 0xc2,
1106         0x3504, 0xffffffff, 0x20899,
1107         0x3500, 0xffffffff, 0xc6,
1108         0x3504, 0xffffffff, 0x2089d,
1109         0x3500, 0xffffffff, 0xca,
1110         0x3504, 0xffffffff, 0x8a1,
1111         0x3500, 0xffffffff, 0xcc,
1112         0x3504, 0xffffffff, 0x8a3,
1113         0x3500, 0xffffffff, 0xce,
1114         0x3504, 0xffffffff, 0x308a5,
1115         0x3500, 0xffffffff, 0xd3,
1116         0x3504, 0xffffffff, 0x6d08cd,
1117         0x3500, 0xffffffff, 0x142,
1118         0x3504, 0xffffffff, 0x2000095a,
1119         0x3504, 0xffffffff, 0x1,
1120         0x3500, 0xffffffff, 0x144,
1121         0x3504, 0xffffffff, 0x301f095b,
1122         0x3500, 0xffffffff, 0x165,
1123         0x3504, 0xffffffff, 0xc094d,
1124         0x3500, 0xffffffff, 0x173,
1125         0x3504, 0xffffffff, 0xf096d,
1126         0x3500, 0xffffffff, 0x184,
1127         0x3504, 0xffffffff, 0x15097f,
1128         0x3500, 0xffffffff, 0x19b,
1129         0x3504, 0xffffffff, 0xc0998,
1130         0x3500, 0xffffffff, 0x1a9,
1131         0x3504, 0xffffffff, 0x409a7,
1132         0x3500, 0xffffffff, 0x1af,
1133         0x3504, 0xffffffff, 0xcdc,
1134         0x3500, 0xffffffff, 0x1b1,
1135         0x3504, 0xffffffff, 0x800,
1136         0x3508, 0xffffffff, 0x6c9b2000,
1137         0x3510, 0xfc00, 0x2000,
1138         0x3544, 0xffffffff, 0xfc0,
1139         0x28d4, 0x00000100, 0x100
1140 };
1141
1142 static void si_init_golden_registers(struct radeon_device *rdev)
1143 {
1144         switch (rdev->family) {
1145         case CHIP_TAHITI:
1146                 radeon_program_register_sequence(rdev,
1147                                                  tahiti_golden_registers,
1148                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1149                 radeon_program_register_sequence(rdev,
1150                                                  tahiti_golden_rlc_registers,
1151                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1152                 radeon_program_register_sequence(rdev,
1153                                                  tahiti_mgcg_cgcg_init,
1154                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1155                 radeon_program_register_sequence(rdev,
1156                                                  tahiti_golden_registers2,
1157                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1158                 break;
1159         case CHIP_PITCAIRN:
1160                 radeon_program_register_sequence(rdev,
1161                                                  pitcairn_golden_registers,
1162                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1163                 radeon_program_register_sequence(rdev,
1164                                                  pitcairn_golden_rlc_registers,
1165                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1166                 radeon_program_register_sequence(rdev,
1167                                                  pitcairn_mgcg_cgcg_init,
1168                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1169                 break;
1170         case CHIP_VERDE:
1171                 radeon_program_register_sequence(rdev,
1172                                                  verde_golden_registers,
1173                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1174                 radeon_program_register_sequence(rdev,
1175                                                  verde_golden_rlc_registers,
1176                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1177                 radeon_program_register_sequence(rdev,
1178                                                  verde_mgcg_cgcg_init,
1179                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1180                 radeon_program_register_sequence(rdev,
1181                                                  verde_pg_init,
1182                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1183                 break;
1184         case CHIP_OLAND:
1185                 radeon_program_register_sequence(rdev,
1186                                                  oland_golden_registers,
1187                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1188                 radeon_program_register_sequence(rdev,
1189                                                  oland_golden_rlc_registers,
1190                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1191                 radeon_program_register_sequence(rdev,
1192                                                  oland_mgcg_cgcg_init,
1193                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1194                 break;
1195         case CHIP_HAINAN:
1196                 radeon_program_register_sequence(rdev,
1197                                                  hainan_golden_registers,
1198                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1199                 radeon_program_register_sequence(rdev,
1200                                                  hainan_golden_registers2,
1201                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1202                 radeon_program_register_sequence(rdev,
1203                                                  hainan_mgcg_cgcg_init,
1204                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1205                 break;
1206         default:
1207                 break;
1208         }
1209 }
1210
1211 #define PCIE_BUS_CLK                10000
1212 #define TCLK                        (PCIE_BUS_CLK / 10)
1213
1214 /**
1215  * si_get_xclk - get the xclk
1216  *
1217  * @rdev: radeon_device pointer
1218  *
1219  * Returns the reference clock used by the gfx engine
1220  * (SI).
1221  */
1222 u32 si_get_xclk(struct radeon_device *rdev)
1223 {
1224         u32 reference_clock = rdev->clock.spll.reference_freq;
1225         u32 tmp;
1226
1227         tmp = RREG32(CG_CLKPIN_CNTL_2);
1228         if (tmp & MUX_TCLK_TO_XCLK)
1229                 return TCLK;
1230
1231         tmp = RREG32(CG_CLKPIN_CNTL);
1232         if (tmp & XTALIN_DIVIDE)
1233                 return reference_clock / 4;
1234
1235         return reference_clock;
1236 }
1237
1238 /* get temperature in millidegrees */
1239 int si_get_temp(struct radeon_device *rdev)
1240 {
1241         u32 temp;
1242         int actual_temp = 0;
1243
1244         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1245                 CTF_TEMP_SHIFT;
1246
1247         if (temp & 0x200)
1248                 actual_temp = 255;
1249         else
1250                 actual_temp = temp & 0x1ff;
1251
1252         actual_temp = (actual_temp * 1000);
1253
1254         return actual_temp;
1255 }
1256
1257 #define TAHITI_IO_MC_REGS_SIZE 36
1258
1259 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1260         {0x0000006f, 0x03044000},
1261         {0x00000070, 0x0480c018},
1262         {0x00000071, 0x00000040},
1263         {0x00000072, 0x01000000},
1264         {0x00000074, 0x000000ff},
1265         {0x00000075, 0x00143400},
1266         {0x00000076, 0x08ec0800},
1267         {0x00000077, 0x040000cc},
1268         {0x00000079, 0x00000000},
1269         {0x0000007a, 0x21000409},
1270         {0x0000007c, 0x00000000},
1271         {0x0000007d, 0xe8000000},
1272         {0x0000007e, 0x044408a8},
1273         {0x0000007f, 0x00000003},
1274         {0x00000080, 0x00000000},
1275         {0x00000081, 0x01000000},
1276         {0x00000082, 0x02000000},
1277         {0x00000083, 0x00000000},
1278         {0x00000084, 0xe3f3e4f4},
1279         {0x00000085, 0x00052024},
1280         {0x00000087, 0x00000000},
1281         {0x00000088, 0x66036603},
1282         {0x00000089, 0x01000000},
1283         {0x0000008b, 0x1c0a0000},
1284         {0x0000008c, 0xff010000},
1285         {0x0000008e, 0xffffefff},
1286         {0x0000008f, 0xfff3efff},
1287         {0x00000090, 0xfff3efbf},
1288         {0x00000094, 0x00101101},
1289         {0x00000095, 0x00000fff},
1290         {0x00000096, 0x00116fff},
1291         {0x00000097, 0x60010000},
1292         {0x00000098, 0x10010000},
1293         {0x00000099, 0x00006000},
1294         {0x0000009a, 0x00001000},
1295         {0x0000009f, 0x00a77400}
1296 };
1297
1298 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1299         {0x0000006f, 0x03044000},
1300         {0x00000070, 0x0480c018},
1301         {0x00000071, 0x00000040},
1302         {0x00000072, 0x01000000},
1303         {0x00000074, 0x000000ff},
1304         {0x00000075, 0x00143400},
1305         {0x00000076, 0x08ec0800},
1306         {0x00000077, 0x040000cc},
1307         {0x00000079, 0x00000000},
1308         {0x0000007a, 0x21000409},
1309         {0x0000007c, 0x00000000},
1310         {0x0000007d, 0xe8000000},
1311         {0x0000007e, 0x044408a8},
1312         {0x0000007f, 0x00000003},
1313         {0x00000080, 0x00000000},
1314         {0x00000081, 0x01000000},
1315         {0x00000082, 0x02000000},
1316         {0x00000083, 0x00000000},
1317         {0x00000084, 0xe3f3e4f4},
1318         {0x00000085, 0x00052024},
1319         {0x00000087, 0x00000000},
1320         {0x00000088, 0x66036603},
1321         {0x00000089, 0x01000000},
1322         {0x0000008b, 0x1c0a0000},
1323         {0x0000008c, 0xff010000},
1324         {0x0000008e, 0xffffefff},
1325         {0x0000008f, 0xfff3efff},
1326         {0x00000090, 0xfff3efbf},
1327         {0x00000094, 0x00101101},
1328         {0x00000095, 0x00000fff},
1329         {0x00000096, 0x00116fff},
1330         {0x00000097, 0x60010000},
1331         {0x00000098, 0x10010000},
1332         {0x00000099, 0x00006000},
1333         {0x0000009a, 0x00001000},
1334         {0x0000009f, 0x00a47400}
1335 };
1336
1337 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1338         {0x0000006f, 0x03044000},
1339         {0x00000070, 0x0480c018},
1340         {0x00000071, 0x00000040},
1341         {0x00000072, 0x01000000},
1342         {0x00000074, 0x000000ff},
1343         {0x00000075, 0x00143400},
1344         {0x00000076, 0x08ec0800},
1345         {0x00000077, 0x040000cc},
1346         {0x00000079, 0x00000000},
1347         {0x0000007a, 0x21000409},
1348         {0x0000007c, 0x00000000},
1349         {0x0000007d, 0xe8000000},
1350         {0x0000007e, 0x044408a8},
1351         {0x0000007f, 0x00000003},
1352         {0x00000080, 0x00000000},
1353         {0x00000081, 0x01000000},
1354         {0x00000082, 0x02000000},
1355         {0x00000083, 0x00000000},
1356         {0x00000084, 0xe3f3e4f4},
1357         {0x00000085, 0x00052024},
1358         {0x00000087, 0x00000000},
1359         {0x00000088, 0x66036603},
1360         {0x00000089, 0x01000000},
1361         {0x0000008b, 0x1c0a0000},
1362         {0x0000008c, 0xff010000},
1363         {0x0000008e, 0xffffefff},
1364         {0x0000008f, 0xfff3efff},
1365         {0x00000090, 0xfff3efbf},
1366         {0x00000094, 0x00101101},
1367         {0x00000095, 0x00000fff},
1368         {0x00000096, 0x00116fff},
1369         {0x00000097, 0x60010000},
1370         {0x00000098, 0x10010000},
1371         {0x00000099, 0x00006000},
1372         {0x0000009a, 0x00001000},
1373         {0x0000009f, 0x00a37400}
1374 };
1375
1376 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1377         {0x0000006f, 0x03044000},
1378         {0x00000070, 0x0480c018},
1379         {0x00000071, 0x00000040},
1380         {0x00000072, 0x01000000},
1381         {0x00000074, 0x000000ff},
1382         {0x00000075, 0x00143400},
1383         {0x00000076, 0x08ec0800},
1384         {0x00000077, 0x040000cc},
1385         {0x00000079, 0x00000000},
1386         {0x0000007a, 0x21000409},
1387         {0x0000007c, 0x00000000},
1388         {0x0000007d, 0xe8000000},
1389         {0x0000007e, 0x044408a8},
1390         {0x0000007f, 0x00000003},
1391         {0x00000080, 0x00000000},
1392         {0x00000081, 0x01000000},
1393         {0x00000082, 0x02000000},
1394         {0x00000083, 0x00000000},
1395         {0x00000084, 0xe3f3e4f4},
1396         {0x00000085, 0x00052024},
1397         {0x00000087, 0x00000000},
1398         {0x00000088, 0x66036603},
1399         {0x00000089, 0x01000000},
1400         {0x0000008b, 0x1c0a0000},
1401         {0x0000008c, 0xff010000},
1402         {0x0000008e, 0xffffefff},
1403         {0x0000008f, 0xfff3efff},
1404         {0x00000090, 0xfff3efbf},
1405         {0x00000094, 0x00101101},
1406         {0x00000095, 0x00000fff},
1407         {0x00000096, 0x00116fff},
1408         {0x00000097, 0x60010000},
1409         {0x00000098, 0x10010000},
1410         {0x00000099, 0x00006000},
1411         {0x0000009a, 0x00001000},
1412         {0x0000009f, 0x00a17730}
1413 };
1414
1415 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1416         {0x0000006f, 0x03044000},
1417         {0x00000070, 0x0480c018},
1418         {0x00000071, 0x00000040},
1419         {0x00000072, 0x01000000},
1420         {0x00000074, 0x000000ff},
1421         {0x00000075, 0x00143400},
1422         {0x00000076, 0x08ec0800},
1423         {0x00000077, 0x040000cc},
1424         {0x00000079, 0x00000000},
1425         {0x0000007a, 0x21000409},
1426         {0x0000007c, 0x00000000},
1427         {0x0000007d, 0xe8000000},
1428         {0x0000007e, 0x044408a8},
1429         {0x0000007f, 0x00000003},
1430         {0x00000080, 0x00000000},
1431         {0x00000081, 0x01000000},
1432         {0x00000082, 0x02000000},
1433         {0x00000083, 0x00000000},
1434         {0x00000084, 0xe3f3e4f4},
1435         {0x00000085, 0x00052024},
1436         {0x00000087, 0x00000000},
1437         {0x00000088, 0x66036603},
1438         {0x00000089, 0x01000000},
1439         {0x0000008b, 0x1c0a0000},
1440         {0x0000008c, 0xff010000},
1441         {0x0000008e, 0xffffefff},
1442         {0x0000008f, 0xfff3efff},
1443         {0x00000090, 0xfff3efbf},
1444         {0x00000094, 0x00101101},
1445         {0x00000095, 0x00000fff},
1446         {0x00000096, 0x00116fff},
1447         {0x00000097, 0x60010000},
1448         {0x00000098, 0x10010000},
1449         {0x00000099, 0x00006000},
1450         {0x0000009a, 0x00001000},
1451         {0x0000009f, 0x00a07730}
1452 };
1453
1454 /* ucode loading */
1455 static int si_mc_load_microcode(struct radeon_device *rdev)
1456 {
1457         const __be32 *fw_data;
1458         u32 running, blackout = 0;
1459         u32 *io_mc_regs;
1460         int i, ucode_size, regs_size;
1461
1462         if (!rdev->mc_fw)
1463                 return -EINVAL;
1464
1465         switch (rdev->family) {
1466         case CHIP_TAHITI:
1467                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1468                 ucode_size = SI_MC_UCODE_SIZE;
1469                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1470                 break;
1471         case CHIP_PITCAIRN:
1472                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1473                 ucode_size = SI_MC_UCODE_SIZE;
1474                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1475                 break;
1476         case CHIP_VERDE:
1477         default:
1478                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1479                 ucode_size = SI_MC_UCODE_SIZE;
1480                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1481                 break;
1482         case CHIP_OLAND:
1483                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1484                 ucode_size = OLAND_MC_UCODE_SIZE;
1485                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1486                 break;
1487         case CHIP_HAINAN:
1488                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1489                 ucode_size = OLAND_MC_UCODE_SIZE;
1490                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1491                 break;
1492         }
1493
1494         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1495
1496         if (running == 0) {
1497                 if (running) {
1498                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1499                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1500                 }
1501
1502                 /* reset the engine and set to writable */
1503                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1504                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1505
1506                 /* load mc io regs */
1507                 for (i = 0; i < regs_size; i++) {
1508                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1509                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1510                 }
1511                 /* load the MC ucode */
1512                 fw_data = (const __be32 *)rdev->mc_fw->data;
1513                 for (i = 0; i < ucode_size; i++)
1514                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1515
1516                 /* put the engine back into the active state */
1517                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1518                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1519                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1520
1521                 /* wait for training to complete */
1522                 for (i = 0; i < rdev->usec_timeout; i++) {
1523                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1524                                 break;
1525                         DRM_UDELAY(1);
1526                 }
1527                 for (i = 0; i < rdev->usec_timeout; i++) {
1528                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1529                                 break;
1530                         DRM_UDELAY(1);
1531                 }
1532
1533                 if (running)
1534                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1535         }
1536
1537         return 0;
1538 }
1539
1540 static int si_init_microcode(struct radeon_device *rdev)
1541 {
1542         const char *chip_name;
1543         const char *rlc_chip_name;
1544         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1545         size_t smc_req_size;
1546         char fw_name[30];
1547         int err;
1548
1549         DRM_DEBUG("\n");
1550
1551         switch (rdev->family) {
1552         case CHIP_TAHITI:
1553                 chip_name = "TAHITI";
1554                 rlc_chip_name = "TAHITI";
1555                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1556                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1557                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1558                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1559                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1560                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1561                 break;
1562         case CHIP_PITCAIRN:
1563                 chip_name = "PITCAIRN";
1564                 rlc_chip_name = "PITCAIRN";
1565                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1566                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1567                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1568                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1569                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1570                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1571                 break;
1572         case CHIP_VERDE:
1573                 chip_name = "VERDE";
1574                 rlc_chip_name = "VERDE";
1575                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1576                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1577                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1578                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1579                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1580                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1581                 break;
1582         case CHIP_OLAND:
1583                 chip_name = "OLAND";
1584                 rlc_chip_name = "OLAND";
1585                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1586                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1587                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1588                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1589                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1590                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1591                 break;
1592         case CHIP_HAINAN:
1593                 chip_name = "HAINAN";
1594                 rlc_chip_name = "HAINAN";
1595                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1596                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1597                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1598                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1599                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1600                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1601                 break;
1602         default: panic("%s: Unsupported family %d", __func__, rdev->family);
1603         }
1604
1605         DRM_INFO("Loading %s Microcode\n", chip_name);
1606         err = 0;
1607
1608         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1609         rdev->pfp_fw = firmware_get(fw_name);
1610         if (rdev->pfp_fw == NULL) {
1611                 err = -ENOENT;
1612                 goto out;
1613         }
1614         if (rdev->pfp_fw->datasize != pfp_req_size) {
1615                 DRM_ERROR(
1616                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1617                        rdev->pfp_fw->datasize, fw_name);
1618                 err = -EINVAL;
1619                 goto out;
1620         }
1621
1622         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1623         rdev->me_fw = firmware_get(fw_name);
1624         if (rdev->me_fw == NULL) {
1625                 err = -ENOENT;
1626                 goto out;
1627         }
1628         if (rdev->me_fw->datasize != me_req_size) {
1629                 DRM_ERROR(
1630                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1631                        rdev->me_fw->datasize, fw_name);
1632                 err = -EINVAL;
1633         }
1634
1635         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1636         rdev->ce_fw = firmware_get(fw_name);
1637         if (rdev->ce_fw == NULL) {
1638                 err = -ENOENT;
1639                 goto out;
1640         }
1641         if (rdev->ce_fw->datasize != ce_req_size) {
1642                 DRM_ERROR(
1643                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1644                        rdev->ce_fw->datasize, fw_name);
1645                 err = -EINVAL;
1646         }
1647
1648         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc",
1649                   rlc_chip_name);
1650         rdev->rlc_fw = firmware_get(fw_name);
1651         if (rdev->rlc_fw == NULL) {
1652                 err = -ENOENT;
1653                 goto out;
1654         }
1655         if (rdev->rlc_fw->datasize != rlc_req_size) {
1656                 DRM_ERROR(
1657                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1658                        rdev->rlc_fw->datasize, fw_name);
1659                 err = -EINVAL;
1660         }
1661
1662         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1663         rdev->mc_fw = firmware_get(fw_name);
1664         if (rdev->mc_fw == NULL) {
1665                 err = -ENOENT;
1666                 goto out;
1667         }
1668         if (rdev->mc_fw->datasize != mc_req_size) {
1669                 DRM_ERROR(
1670                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1671                        rdev->mc_fw->datasize, fw_name);
1672                 err = -EINVAL;
1673         }
1674
1675         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1676         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1677         if (err) {
1678                 printk(KERN_ERR
1679                        "smc: error loading firmware \"%s\"\n",
1680                        fw_name);
1681                 release_firmware(rdev->smc_fw);
1682                 rdev->smc_fw = NULL;
1683         } else if (rdev->smc_fw->datasize != smc_req_size) {
1684                 DRM_ERROR(
1685                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1686                        rdev->smc_fw->datasize, fw_name);
1687                 err = -EINVAL;
1688         }
1689
1690 out:
1691         if (err) {
1692                 if (err != -EINVAL)
1693                         DRM_ERROR(
1694                                "si_cp: Failed to load firmware \"%s\"\n",
1695                                fw_name);
1696                 if (rdev->pfp_fw != NULL) {
1697                         firmware_put(rdev->pfp_fw, FIRMWARE_UNLOAD);
1698                         rdev->pfp_fw = NULL;
1699                 }
1700                 if (rdev->me_fw != NULL) {
1701                         firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
1702                         rdev->me_fw = NULL;
1703                 }
1704                 if (rdev->ce_fw != NULL) {
1705                         firmware_put(rdev->ce_fw, FIRMWARE_UNLOAD);
1706                         rdev->ce_fw = NULL;
1707                 }
1708                 if (rdev->rlc_fw != NULL) {
1709                         firmware_put(rdev->rlc_fw, FIRMWARE_UNLOAD);
1710                         rdev->rlc_fw = NULL;
1711                 }
1712                 if (rdev->mc_fw != NULL) {
1713                         firmware_put(rdev->mc_fw, FIRMWARE_UNLOAD);
1714                         rdev->mc_fw = NULL;
1715                 }
1716                 if (rdev->smc_fw != NULL) {
1717                         firmware_put(rdev->smc_fw, FIRMWARE_UNLOAD);
1718                         rdev->smc_fw = NULL;
1719                 }
1720         }
1721         return err;
1722 }
1723
1724 /**
1725  * si_fini_microcode - drop the firmwares image references
1726  *
1727  * @rdev: radeon_device pointer
1728  *
1729  * Drop the pfp, me, rlc, mc and ce firmware image references.
1730  * Called at driver shutdown.
1731  */
1732 static void si_fini_microcode(struct radeon_device *rdev)
1733 {
1734
1735         if (rdev->pfp_fw != NULL) {
1736                 firmware_put(rdev->pfp_fw, FIRMWARE_UNLOAD);
1737                 rdev->pfp_fw = NULL;
1738         }
1739
1740         if (rdev->me_fw != NULL) {
1741                 firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
1742                 rdev->me_fw = NULL;
1743         }
1744
1745         if (rdev->rlc_fw != NULL) {
1746                 firmware_put(rdev->rlc_fw, FIRMWARE_UNLOAD);
1747                 rdev->rlc_fw = NULL;
1748         }
1749
1750         if (rdev->mc_fw != NULL) {
1751                 firmware_put(rdev->mc_fw, FIRMWARE_UNLOAD);
1752                 rdev->mc_fw = NULL;
1753         }
1754
1755         if (rdev->smc_fw != NULL) {
1756                 firmware_put(rdev->smc_fw, FIRMWARE_UNLOAD);
1757                 rdev->smc_fw = NULL;
1758         }
1759
1760         if (rdev->ce_fw != NULL) {
1761                 firmware_put(rdev->ce_fw, FIRMWARE_UNLOAD);
1762                 rdev->ce_fw = NULL;
1763         }
1764 }
1765
1766 /* watermark setup */
1767 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1768                                    struct radeon_crtc *radeon_crtc,
1769                                    struct drm_display_mode *mode,
1770                                    struct drm_display_mode *other_mode)
1771 {
1772         u32 tmp;
1773         /*
1774          * Line Buffer Setup
1775          * There are 3 line buffers, each one shared by 2 display controllers.
1776          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1777          * the display controllers.  The paritioning is done via one of four
1778          * preset allocations specified in bits 21:20:
1779          *  0 - half lb
1780          *  2 - whole lb, other crtc must be disabled
1781          */
1782         /* this can get tricky if we have two large displays on a paired group
1783          * of crtcs.  Ideally for multiple large displays we'd assign them to
1784          * non-linked crtcs for maximum line buffer allocation.
1785          */
1786         if (radeon_crtc->base.enabled && mode) {
1787                 if (other_mode)
1788                         tmp = 0; /* 1/2 */
1789                 else
1790                         tmp = 2; /* whole */
1791         } else
1792                 tmp = 0;
1793
1794         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1795                DC_LB_MEMORY_CONFIG(tmp));
1796
1797         if (radeon_crtc->base.enabled && mode) {
1798                 switch (tmp) {
1799                 case 0:
1800                 default:
1801                         return 4096 * 2;
1802                 case 2:
1803                         return 8192 * 2;
1804                 }
1805         }
1806
1807         /* controller not enabled, so no lb used */
1808         return 0;
1809 }
1810
1811 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1812 {
1813         u32 tmp = RREG32(MC_SHARED_CHMAP);
1814
1815         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1816         case 0:
1817         default:
1818                 return 1;
1819         case 1:
1820                 return 2;
1821         case 2:
1822                 return 4;
1823         case 3:
1824                 return 8;
1825         case 4:
1826                 return 3;
1827         case 5:
1828                 return 6;
1829         case 6:
1830                 return 10;
1831         case 7:
1832                 return 12;
1833         case 8:
1834                 return 16;
1835         }
1836 }
1837
1838 struct dce6_wm_params {
1839         u32 dram_channels; /* number of dram channels */
1840         u32 yclk;          /* bandwidth per dram data pin in kHz */
1841         u32 sclk;          /* engine clock in kHz */
1842         u32 disp_clk;      /* display clock in kHz */
1843         u32 src_width;     /* viewport width */
1844         u32 active_time;   /* active display time in ns */
1845         u32 blank_time;    /* blank time in ns */
1846         bool interlaced;    /* mode is interlaced */
1847         fixed20_12 vsc;    /* vertical scale ratio */
1848         u32 num_heads;     /* number of active crtcs */
1849         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1850         u32 lb_size;       /* line buffer allocated to pipe */
1851         u32 vtaps;         /* vertical scaler taps */
1852 };
1853
1854 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1855 {
1856         /* Calculate raw DRAM Bandwidth */
1857         fixed20_12 dram_efficiency; /* 0.7 */
1858         fixed20_12 yclk, dram_channels, bandwidth;
1859         fixed20_12 a;
1860
1861         a.full = dfixed_const(1000);
1862         yclk.full = dfixed_const(wm->yclk);
1863         yclk.full = dfixed_div(yclk, a);
1864         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1865         a.full = dfixed_const(10);
1866         dram_efficiency.full = dfixed_const(7);
1867         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1868         bandwidth.full = dfixed_mul(dram_channels, yclk);
1869         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1870
1871         return dfixed_trunc(bandwidth);
1872 }
1873
1874 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1875 {
1876         /* Calculate DRAM Bandwidth and the part allocated to display. */
1877         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1878         fixed20_12 yclk, dram_channels, bandwidth;
1879         fixed20_12 a;
1880
1881         a.full = dfixed_const(1000);
1882         yclk.full = dfixed_const(wm->yclk);
1883         yclk.full = dfixed_div(yclk, a);
1884         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1885         a.full = dfixed_const(10);
1886         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1887         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1888         bandwidth.full = dfixed_mul(dram_channels, yclk);
1889         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1890
1891         return dfixed_trunc(bandwidth);
1892 }
1893
1894 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1895 {
1896         /* Calculate the display Data return Bandwidth */
1897         fixed20_12 return_efficiency; /* 0.8 */
1898         fixed20_12 sclk, bandwidth;
1899         fixed20_12 a;
1900
1901         a.full = dfixed_const(1000);
1902         sclk.full = dfixed_const(wm->sclk);
1903         sclk.full = dfixed_div(sclk, a);
1904         a.full = dfixed_const(10);
1905         return_efficiency.full = dfixed_const(8);
1906         return_efficiency.full = dfixed_div(return_efficiency, a);
1907         a.full = dfixed_const(32);
1908         bandwidth.full = dfixed_mul(a, sclk);
1909         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1910
1911         return dfixed_trunc(bandwidth);
1912 }
1913
1914 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1915 {
1916         return 32;
1917 }
1918
1919 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1920 {
1921         /* Calculate the DMIF Request Bandwidth */
1922         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1923         fixed20_12 disp_clk, sclk, bandwidth;
1924         fixed20_12 a, b1, b2;
1925         u32 min_bandwidth;
1926
1927         a.full = dfixed_const(1000);
1928         disp_clk.full = dfixed_const(wm->disp_clk);
1929         disp_clk.full = dfixed_div(disp_clk, a);
1930         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1931         b1.full = dfixed_mul(a, disp_clk);
1932
1933         a.full = dfixed_const(1000);
1934         sclk.full = dfixed_const(wm->sclk);
1935         sclk.full = dfixed_div(sclk, a);
1936         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1937         b2.full = dfixed_mul(a, sclk);
1938
1939         a.full = dfixed_const(10);
1940         disp_clk_request_efficiency.full = dfixed_const(8);
1941         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1942
1943         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1944
1945         a.full = dfixed_const(min_bandwidth);
1946         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1947
1948         return dfixed_trunc(bandwidth);
1949 }
1950
1951 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1952 {
1953         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1954         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1955         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1956         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1957
1958         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1959 }
1960
1961 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1962 {
1963         /* Calculate the display mode Average Bandwidth
1964          * DisplayMode should contain the source and destination dimensions,
1965          * timing, etc.
1966          */
1967         fixed20_12 bpp;
1968         fixed20_12 line_time;
1969         fixed20_12 src_width;
1970         fixed20_12 bandwidth;
1971         fixed20_12 a;
1972
1973         a.full = dfixed_const(1000);
1974         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1975         line_time.full = dfixed_div(line_time, a);
1976         bpp.full = dfixed_const(wm->bytes_per_pixel);
1977         src_width.full = dfixed_const(wm->src_width);
1978         bandwidth.full = dfixed_mul(src_width, bpp);
1979         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1980         bandwidth.full = dfixed_div(bandwidth, line_time);
1981
1982         return dfixed_trunc(bandwidth);
1983 }
1984
1985 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1986 {
1987         /* First calcualte the latency in ns */
1988         u32 mc_latency = 2000; /* 2000 ns. */
1989         u32 available_bandwidth = dce6_available_bandwidth(wm);
1990         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1991         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1992         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1993         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1994                 (wm->num_heads * cursor_line_pair_return_time);
1995         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1996         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1997         u32 tmp, dmif_size = 12288;
1998         fixed20_12 a, b, c;
1999
2000         if (wm->num_heads == 0)
2001                 return 0;
2002
2003         a.full = dfixed_const(2);
2004         b.full = dfixed_const(1);
2005         if ((wm->vsc.full > a.full) ||
2006             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2007             (wm->vtaps >= 5) ||
2008             ((wm->vsc.full >= a.full) && wm->interlaced))
2009                 max_src_lines_per_dst_line = 4;
2010         else
2011                 max_src_lines_per_dst_line = 2;
2012
2013         a.full = dfixed_const(available_bandwidth);
2014         b.full = dfixed_const(wm->num_heads);
2015         a.full = dfixed_div(a, b);
2016
2017         b.full = dfixed_const(mc_latency + 512);
2018         c.full = dfixed_const(wm->disp_clk);
2019         b.full = dfixed_div(b, c);
2020
2021         c.full = dfixed_const(dmif_size);
2022         b.full = dfixed_div(c, b);
2023
2024         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2025
2026         b.full = dfixed_const(1000);
2027         c.full = dfixed_const(wm->disp_clk);
2028         b.full = dfixed_div(c, b);
2029         c.full = dfixed_const(wm->bytes_per_pixel);
2030         b.full = dfixed_mul(b, c);
2031
2032         lb_fill_bw = min(tmp, dfixed_trunc(b));
2033
2034         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2035         b.full = dfixed_const(1000);
2036         c.full = dfixed_const(lb_fill_bw);
2037         b.full = dfixed_div(c, b);
2038         a.full = dfixed_div(a, b);
2039         line_fill_time = dfixed_trunc(a);
2040
2041         if (line_fill_time < wm->active_time)
2042                 return latency;
2043         else
2044                 return latency + (line_fill_time - wm->active_time);
2045
2046 }
2047
2048 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2049 {
2050         if (dce6_average_bandwidth(wm) <=
2051             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2052                 return true;
2053         else
2054                 return false;
2055 };
2056
2057 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2058 {
2059         if (dce6_average_bandwidth(wm) <=
2060             (dce6_available_bandwidth(wm) / wm->num_heads))
2061                 return true;
2062         else
2063                 return false;
2064 };
2065
2066 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2067 {
2068         u32 lb_partitions = wm->lb_size / wm->src_width;
2069         u32 line_time = wm->active_time + wm->blank_time;
2070         u32 latency_tolerant_lines;
2071         u32 latency_hiding;
2072         fixed20_12 a;
2073
2074         a.full = dfixed_const(1);
2075         if (wm->vsc.full > a.full)
2076                 latency_tolerant_lines = 1;
2077         else {
2078                 if (lb_partitions <= (wm->vtaps + 1))
2079                         latency_tolerant_lines = 1;
2080                 else
2081                         latency_tolerant_lines = 2;
2082         }
2083
2084         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2085
2086         if (dce6_latency_watermark(wm) <= latency_hiding)
2087                 return true;
2088         else
2089                 return false;
2090 }
2091
2092 static void dce6_program_watermarks(struct radeon_device *rdev,
2093                                          struct radeon_crtc *radeon_crtc,
2094                                          u32 lb_size, u32 num_heads)
2095 {
2096         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2097         struct dce6_wm_params wm_low, wm_high;
2098         u32 dram_channels;
2099         u32 pixel_period;
2100         u32 line_time = 0;
2101         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2102         u32 priority_a_mark = 0, priority_b_mark = 0;
2103         u32 priority_a_cnt = PRIORITY_OFF;
2104         u32 priority_b_cnt = PRIORITY_OFF;
2105         u32 tmp, arb_control3;
2106         fixed20_12 a, b, c;
2107
2108         if (radeon_crtc->base.enabled && num_heads && mode) {
2109                 pixel_period = 1000000 / (u32)mode->clock;
2110                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2111                 priority_a_cnt = 0;
2112                 priority_b_cnt = 0;
2113
2114                 if (rdev->family == CHIP_ARUBA)
2115                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2116                 else
2117                         dram_channels = si_get_number_of_dram_channels(rdev);
2118
2119                 /* watermark for high clocks */
2120                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2121                         wm_high.yclk =
2122                                 radeon_dpm_get_mclk(rdev, false) * 10;
2123                         wm_high.sclk =
2124                                 radeon_dpm_get_sclk(rdev, false) * 10;
2125                 } else {
2126                         wm_high.yclk = rdev->pm.current_mclk * 10;
2127                         wm_high.sclk = rdev->pm.current_sclk * 10;
2128                 }
2129
2130                 wm_high.disp_clk = mode->clock;
2131                 wm_high.src_width = mode->crtc_hdisplay;
2132                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2133                 wm_high.blank_time = line_time - wm_high.active_time;
2134                 wm_high.interlaced = false;
2135                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2136                         wm_high.interlaced = true;
2137                 wm_high.vsc = radeon_crtc->vsc;
2138                 wm_high.vtaps = 1;
2139                 if (radeon_crtc->rmx_type != RMX_OFF)
2140                         wm_high.vtaps = 2;
2141                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2142                 wm_high.lb_size = lb_size;
2143                 wm_high.dram_channels = dram_channels;
2144                 wm_high.num_heads = num_heads;
2145
2146                 /* watermark for low clocks */
2147                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2148                         wm_low.yclk =
2149                                 radeon_dpm_get_mclk(rdev, true) * 10;
2150                         wm_low.sclk =
2151                                 radeon_dpm_get_sclk(rdev, true) * 10;
2152                 } else {
2153                         wm_low.yclk = rdev->pm.current_mclk * 10;
2154                         wm_low.sclk = rdev->pm.current_sclk * 10;
2155                 }
2156
2157                 wm_low.disp_clk = mode->clock;
2158                 wm_low.src_width = mode->crtc_hdisplay;
2159                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2160                 wm_low.blank_time = line_time - wm_low.active_time;
2161                 wm_low.interlaced = false;
2162                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2163                         wm_low.interlaced = true;
2164                 wm_low.vsc = radeon_crtc->vsc;
2165                 wm_low.vtaps = 1;
2166                 if (radeon_crtc->rmx_type != RMX_OFF)
2167                         wm_low.vtaps = 2;
2168                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2169                 wm_low.lb_size = lb_size;
2170                 wm_low.dram_channels = dram_channels;
2171                 wm_low.num_heads = num_heads;
2172
2173                 /* set for high clocks */
2174                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2175                 /* set for low clocks */
2176                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2177
2178                 /* possibly force display priority to high */
2179                 /* should really do this at mode validation time... */
2180                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2181                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2182                     !dce6_check_latency_hiding(&wm_high) ||
2183                     (rdev->disp_priority == 2)) {
2184                         DRM_DEBUG_KMS("force priority to high\n");
2185                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2186                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2187                 }
2188                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2189                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2190                     !dce6_check_latency_hiding(&wm_low) ||
2191                     (rdev->disp_priority == 2)) {
2192                         DRM_DEBUG_KMS("force priority to high\n");
2193                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2194                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2195                 }
2196
2197                 a.full = dfixed_const(1000);
2198                 b.full = dfixed_const(mode->clock);
2199                 b.full = dfixed_div(b, a);
2200                 c.full = dfixed_const(latency_watermark_a);
2201                 c.full = dfixed_mul(c, b);
2202                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2203                 c.full = dfixed_div(c, a);
2204                 a.full = dfixed_const(16);
2205                 c.full = dfixed_div(c, a);
2206                 priority_a_mark = dfixed_trunc(c);
2207                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2208
2209                 a.full = dfixed_const(1000);
2210                 b.full = dfixed_const(mode->clock);
2211                 b.full = dfixed_div(b, a);
2212                 c.full = dfixed_const(latency_watermark_b);
2213                 c.full = dfixed_mul(c, b);
2214                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2215                 c.full = dfixed_div(c, a);
2216                 a.full = dfixed_const(16);
2217                 c.full = dfixed_div(c, a);
2218                 priority_b_mark = dfixed_trunc(c);
2219                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2220         }
2221
2222         /* select wm A */
2223         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2224         tmp = arb_control3;
2225         tmp &= ~LATENCY_WATERMARK_MASK(3);
2226         tmp |= LATENCY_WATERMARK_MASK(1);
2227         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2228         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2229                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2230                 LATENCY_HIGH_WATERMARK(line_time)));
2231         /* select wm B */
2232         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2233         tmp &= ~LATENCY_WATERMARK_MASK(3);
2234         tmp |= LATENCY_WATERMARK_MASK(2);
2235         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2236         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2237                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2238                 LATENCY_HIGH_WATERMARK(line_time)));
2239         /* restore original selection */
2240         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2241
2242         /* write the priority marks */
2243         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2244         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2245
2246         /* save values for DPM */
2247         radeon_crtc->line_time = line_time;
2248         radeon_crtc->wm_high = latency_watermark_a;
2249         radeon_crtc->wm_low = latency_watermark_b;
2250 }
2251
2252 void dce6_bandwidth_update(struct radeon_device *rdev)
2253 {
2254         struct drm_display_mode *mode0 = NULL;
2255         struct drm_display_mode *mode1 = NULL;
2256         u32 num_heads = 0, lb_size;
2257         int i;
2258
2259         radeon_update_display_priority(rdev);
2260
2261         for (i = 0; i < rdev->num_crtc; i++) {
2262                 if (rdev->mode_info.crtcs[i]->base.enabled)
2263                         num_heads++;
2264         }
2265         for (i = 0; i < rdev->num_crtc; i += 2) {
2266                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2267                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2268                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2269                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2270                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2271                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2272         }
2273 }
2274
2275 /*
2276  * Core functions
2277  */
2278 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2279 {
2280         const u32 num_tile_mode_states = 32;
2281         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2282
2283         switch (rdev->config.si.mem_row_size_in_kb) {
2284         case 1:
2285                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2286                 break;
2287         case 2:
2288         default:
2289                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2290                 break;
2291         case 4:
2292                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2293                 break;
2294         }
2295
2296         if ((rdev->family == CHIP_TAHITI) ||
2297             (rdev->family == CHIP_PITCAIRN)) {
2298                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2299                         switch (reg_offset) {
2300                         case 0:  /* non-AA compressed depth or any compressed stencil */
2301                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2303                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2304                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2305                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2306                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2308                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2309                                 break;
2310                         case 1:  /* 2xAA/4xAA compressed depth only */
2311                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2313                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2314                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2315                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2316                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2317                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2318                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2319                                 break;
2320                         case 2:  /* 8xAA compressed depth only */
2321                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2323                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2324                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2325                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2326                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2328                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2329                                 break;
2330                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2331                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2333                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2334                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2335                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2336                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2337                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2338                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2339                                 break;
2340                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2341                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2343                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2344                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2345                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2346                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2348                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2349                                 break;
2350                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2351                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2353                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2354                                                  TILE_SPLIT(split_equal_to_row_size) |
2355                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2356                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2357                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2358                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2359                                 break;
2360                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2361                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2363                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2364                                                  TILE_SPLIT(split_equal_to_row_size) |
2365                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2366                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2367                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2368                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2369                                 break;
2370                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2371                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2372                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2373                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2374                                                  TILE_SPLIT(split_equal_to_row_size) |
2375                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2376                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2378                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2379                                 break;
2380                         case 8:  /* 1D and 1D Array Surfaces */
2381                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2382                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2383                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2384                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2385                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2386                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2388                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2389                                 break;
2390                         case 9:  /* Displayable maps. */
2391                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2392                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2393                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2394                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2395                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2396                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2398                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2399                                 break;
2400                         case 10:  /* Display 8bpp. */
2401                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2404                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2405                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2406                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2408                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2409                                 break;
2410                         case 11:  /* Display 16bpp. */
2411                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2414                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2415                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2416                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2418                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2419                                 break;
2420                         case 12:  /* Display 32bpp. */
2421                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2424                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2425                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2426                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2428                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2429                                 break;
2430                         case 13:  /* Thin. */
2431                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2432                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2433                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2434                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2435                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2436                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2438                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2439                                 break;
2440                         case 14:  /* Thin 8 bpp. */
2441                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2442                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2443                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2444                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2445                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2446                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2449                                 break;
2450                         case 15:  /* Thin 16 bpp. */
2451                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2453                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2454                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2455                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2456                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2458                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2459                                 break;
2460                         case 16:  /* Thin 32 bpp. */
2461                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2463                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2464                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2465                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2466                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2469                                 break;
2470                         case 17:  /* Thin 64 bpp. */
2471                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2473                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2474                                                  TILE_SPLIT(split_equal_to_row_size) |
2475                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2476                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2479                                 break;
2480                         case 21:  /* 8 bpp PRT. */
2481                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2482                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2483                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2484                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2485                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2486                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2487                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2489                                 break;
2490                         case 22:  /* 16 bpp PRT */
2491                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2493                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2494                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2495                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2496                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2499                                 break;
2500                         case 23:  /* 32 bpp PRT */
2501                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2502                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2503                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2504                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2505                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2506                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2508                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2509                                 break;
2510                         case 24:  /* 64 bpp PRT */
2511                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2513                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2514                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2515                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2516                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2519                                 break;
2520                         case 25:  /* 128 bpp PRT */
2521                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2523                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2524                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2525                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2526                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2529                                 break;
2530                         default:
2531                                 gb_tile_moden = 0;
2532                                 break;
2533                         }
2534                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2535                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2536                 }
2537         } else if ((rdev->family == CHIP_VERDE) ||
2538                    (rdev->family == CHIP_OLAND) ||
2539                    (rdev->family == CHIP_HAINAN)) {
2540                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2541                         switch (reg_offset) {
2542                         case 0:  /* non-AA compressed depth or any compressed stencil */
2543                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2545                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2546                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2547                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2548                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2550                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2551                                 break;
2552                         case 1:  /* 2xAA/4xAA compressed depth only */
2553                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2555                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2556                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2557                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2558                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2560                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2561                                 break;
2562                         case 2:  /* 8xAA compressed depth only */
2563                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2565                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2566                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2567                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2568                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2570                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2571                                 break;
2572                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2573                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2575                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2576                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2577                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2578                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2581                                 break;
2582                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2583                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2584                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2585                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2586                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2587                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2588                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2591                                 break;
2592                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2593                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2595                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2596                                                  TILE_SPLIT(split_equal_to_row_size) |
2597                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2598                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2600                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2601                                 break;
2602                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2603                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2605                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2606                                                  TILE_SPLIT(split_equal_to_row_size) |
2607                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2608                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2611                                 break;
2612                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2613                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2615                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2616                                                  TILE_SPLIT(split_equal_to_row_size) |
2617                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2618                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2621                                 break;
2622                         case 8:  /* 1D and 1D Array Surfaces */
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2624                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2625                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2626                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2627                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2628                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2631                                 break;
2632                         case 9:  /* Displayable maps. */
2633                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2634                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2635                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2636                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2637                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2638                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2641                                 break;
2642                         case 10:  /* Display 8bpp. */
2643                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2645                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2646                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2647                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2648                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2650                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2651                                 break;
2652                         case 11:  /* Display 16bpp. */
2653                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2656                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2657                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2658                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2660                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2661                                 break;
2662                         case 12:  /* Display 32bpp. */
2663                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2664                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2665                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2666                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2667                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2668                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2670                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2671                                 break;
2672                         case 13:  /* Thin. */
2673                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2675                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2676                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2677                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2678                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2680                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2681                                 break;
2682                         case 14:  /* Thin 8 bpp. */
2683                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2685                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2687                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2688                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2690                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2691                                 break;
2692                         case 15:  /* Thin 16 bpp. */
2693                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2695                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2696                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2697                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2698                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2700                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2701                                 break;
2702                         case 16:  /* Thin 32 bpp. */
2703                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2705                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2707                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2708                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2711                                 break;
2712                         case 17:  /* Thin 64 bpp. */
2713                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2715                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2716                                                  TILE_SPLIT(split_equal_to_row_size) |
2717                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2718                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2720                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2721                                 break;
2722                         case 21:  /* 8 bpp PRT. */
2723                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2725                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2726                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2727                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2728                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2729                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2730                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2731                                 break;
2732                         case 22:  /* 16 bpp PRT */
2733                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2735                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2736                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2737                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2738                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2741                                 break;
2742                         case 23:  /* 32 bpp PRT */
2743                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2745                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2746                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2747                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2748                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2750                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2751                                 break;
2752                         case 24:  /* 64 bpp PRT */
2753                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2755                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2756                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2757                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2758                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2760                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2761                                 break;
2762                         case 25:  /* 128 bpp PRT */
2763                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2765                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2766                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2767                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2768                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2771                                 break;
2772                         default:
2773                                 gb_tile_moden = 0;
2774                                 break;
2775                         }
2776                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2777                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2778                 }
2779         } else
2780                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2781 }
2782
2783 static void si_select_se_sh(struct radeon_device *rdev,
2784                             u32 se_num, u32 sh_num)
2785 {
2786         u32 data = INSTANCE_BROADCAST_WRITES;
2787
2788         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2789                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2790         else if (se_num == 0xffffffff)
2791                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2792         else if (sh_num == 0xffffffff)
2793                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2794         else
2795                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2796         WREG32(GRBM_GFX_INDEX, data);
2797 }
2798
2799 static u32 si_create_bitmask(u32 bit_width)
2800 {
2801         u32 i, mask = 0;
2802
2803         for (i = 0; i < bit_width; i++) {
2804                 mask <<= 1;
2805                 mask |= 1;
2806         }
2807         return mask;
2808 }
2809
2810 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2811 {
2812         u32 data, mask;
2813
2814         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2815         if (data & 1)
2816                 data &= INACTIVE_CUS_MASK;
2817         else
2818                 data = 0;
2819         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2820
2821         data >>= INACTIVE_CUS_SHIFT;
2822
2823         mask = si_create_bitmask(cu_per_sh);
2824
2825         return ~data & mask;
2826 }
2827
2828 static void si_setup_spi(struct radeon_device *rdev,
2829                          u32 se_num, u32 sh_per_se,
2830                          u32 cu_per_sh)
2831 {
2832         int i, j, k;
2833         u32 data, mask, active_cu;
2834
2835         for (i = 0; i < se_num; i++) {
2836                 for (j = 0; j < sh_per_se; j++) {
2837                         si_select_se_sh(rdev, i, j);
2838                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2839                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2840
2841                         mask = 1;
2842                         for (k = 0; k < 16; k++) {
2843                                 mask <<= k;
2844                                 if (active_cu & mask) {
2845                                         data &= ~mask;
2846                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2847                                         break;
2848                                 }
2849                         }
2850                 }
2851         }
2852         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2853 }
2854
2855 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2856                               u32 max_rb_num, u32 se_num,
2857                               u32 sh_per_se)
2858 {
2859         u32 data, mask;
2860
2861         data = RREG32(CC_RB_BACKEND_DISABLE);
2862         if (data & 1)
2863                 data &= BACKEND_DISABLE_MASK;
2864         else
2865                 data = 0;
2866         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2867
2868         data >>= BACKEND_DISABLE_SHIFT;
2869
2870         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2871
2872         return data & mask;
2873 }
2874
2875 static void si_setup_rb(struct radeon_device *rdev,
2876                         u32 se_num, u32 sh_per_se,
2877                         u32 max_rb_num)
2878 {
2879         int i, j;
2880         u32 data, mask;
2881         u32 disabled_rbs = 0;
2882         u32 enabled_rbs = 0;
2883
2884         for (i = 0; i < se_num; i++) {
2885                 for (j = 0; j < sh_per_se; j++) {
2886                         si_select_se_sh(rdev, i, j);
2887                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2888                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2889                 }
2890         }
2891         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2892
2893         mask = 1;
2894         for (i = 0; i < max_rb_num; i++) {
2895                 if (!(disabled_rbs & mask))
2896                         enabled_rbs |= mask;
2897                 mask <<= 1;
2898         }
2899
2900         for (i = 0; i < se_num; i++) {
2901                 si_select_se_sh(rdev, i, 0xffffffff);
2902                 data = 0;
2903                 for (j = 0; j < sh_per_se; j++) {
2904                         switch (enabled_rbs & 3) {
2905                         case 1:
2906                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2907                                 break;
2908                         case 2:
2909                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2910                                 break;
2911                         case 3:
2912                         default:
2913                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2914                                 break;
2915                         }
2916                         enabled_rbs >>= 2;
2917                 }
2918                 WREG32(PA_SC_RASTER_CONFIG, data);
2919         }
2920         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2921 }
2922
2923 static void si_gpu_init(struct radeon_device *rdev)
2924 {
2925         u32 gb_addr_config = 0;
2926         u32 mc_shared_chmap, mc_arb_ramcfg;
2927         u32 sx_debug_1;
2928         u32 hdp_host_path_cntl;
2929         u32 tmp;
2930         int i, j;
2931
2932         switch (rdev->family) {
2933         case CHIP_TAHITI:
2934                 rdev->config.si.max_shader_engines = 2;
2935                 rdev->config.si.max_tile_pipes = 12;
2936                 rdev->config.si.max_cu_per_sh = 8;
2937                 rdev->config.si.max_sh_per_se = 2;
2938                 rdev->config.si.max_backends_per_se = 4;
2939                 rdev->config.si.max_texture_channel_caches = 12;
2940                 rdev->config.si.max_gprs = 256;
2941                 rdev->config.si.max_gs_threads = 32;
2942                 rdev->config.si.max_hw_contexts = 8;
2943
2944                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2945                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2946                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2947                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2948                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2949                 break;
2950         case CHIP_PITCAIRN:
2951                 rdev->config.si.max_shader_engines = 2;
2952                 rdev->config.si.max_tile_pipes = 8;
2953                 rdev->config.si.max_cu_per_sh = 5;
2954                 rdev->config.si.max_sh_per_se = 2;
2955                 rdev->config.si.max_backends_per_se = 4;
2956                 rdev->config.si.max_texture_channel_caches = 8;
2957                 rdev->config.si.max_gprs = 256;
2958                 rdev->config.si.max_gs_threads = 32;
2959                 rdev->config.si.max_hw_contexts = 8;
2960
2961                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2962                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2963                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2964                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2965                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2966                 break;
2967         case CHIP_VERDE:
2968         default:
2969                 rdev->config.si.max_shader_engines = 1;
2970                 rdev->config.si.max_tile_pipes = 4;
2971                 rdev->config.si.max_cu_per_sh = 5;
2972                 rdev->config.si.max_sh_per_se = 2;
2973                 rdev->config.si.max_backends_per_se = 4;
2974                 rdev->config.si.max_texture_channel_caches = 4;
2975                 rdev->config.si.max_gprs = 256;
2976                 rdev->config.si.max_gs_threads = 32;
2977                 rdev->config.si.max_hw_contexts = 8;
2978
2979                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2980                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2981                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2982                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2983                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2984                 break;
2985         case CHIP_OLAND:
2986                 rdev->config.si.max_shader_engines = 1;
2987                 rdev->config.si.max_tile_pipes = 4;
2988                 rdev->config.si.max_cu_per_sh = 6;
2989                 rdev->config.si.max_sh_per_se = 1;
2990                 rdev->config.si.max_backends_per_se = 2;
2991                 rdev->config.si.max_texture_channel_caches = 4;
2992                 rdev->config.si.max_gprs = 256;
2993                 rdev->config.si.max_gs_threads = 16;
2994                 rdev->config.si.max_hw_contexts = 8;
2995
2996                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2997                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2998                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2999                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3000                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3001                 break;
3002         case CHIP_HAINAN:
3003                 rdev->config.si.max_shader_engines = 1;
3004                 rdev->config.si.max_tile_pipes = 4;
3005                 rdev->config.si.max_cu_per_sh = 5;
3006                 rdev->config.si.max_sh_per_se = 1;
3007                 rdev->config.si.max_backends_per_se = 1;
3008                 rdev->config.si.max_texture_channel_caches = 2;
3009                 rdev->config.si.max_gprs = 256;
3010                 rdev->config.si.max_gs_threads = 16;
3011                 rdev->config.si.max_hw_contexts = 8;
3012
3013                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3014                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3015                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3016                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3017                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3018                 break;
3019         }
3020
3021         /* Initialize HDP */
3022         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3023                 WREG32((0x2c14 + j), 0x00000000);
3024                 WREG32((0x2c18 + j), 0x00000000);
3025                 WREG32((0x2c1c + j), 0x00000000);
3026                 WREG32((0x2c20 + j), 0x00000000);
3027                 WREG32((0x2c24 + j), 0x00000000);
3028         }
3029
3030         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3031
3032         evergreen_fix_pci_max_read_req_size(rdev);
3033
3034         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3035
3036         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3037         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3038
3039         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3040         rdev->config.si.mem_max_burst_length_bytes = 256;
3041         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3042         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3043         if (rdev->config.si.mem_row_size_in_kb > 4)
3044                 rdev->config.si.mem_row_size_in_kb = 4;
3045         /* XXX use MC settings? */
3046         rdev->config.si.shader_engine_tile_size = 32;
3047         rdev->config.si.num_gpus = 1;
3048         rdev->config.si.multi_gpu_tile_size = 64;
3049
3050         /* fix up row size */
3051         gb_addr_config &= ~ROW_SIZE_MASK;
3052         switch (rdev->config.si.mem_row_size_in_kb) {
3053         case 1:
3054         default:
3055                 gb_addr_config |= ROW_SIZE(0);
3056                 break;
3057         case 2:
3058                 gb_addr_config |= ROW_SIZE(1);
3059                 break;
3060         case 4:
3061                 gb_addr_config |= ROW_SIZE(2);
3062                 break;
3063         }
3064
3065         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3066          * not have bank info, so create a custom tiling dword.
3067          * bits 3:0   num_pipes
3068          * bits 7:4   num_banks
3069          * bits 11:8  group_size
3070          * bits 15:12 row_size
3071          */
3072         rdev->config.si.tile_config = 0;
3073         switch (rdev->config.si.num_tile_pipes) {
3074         case 1:
3075                 rdev->config.si.tile_config |= (0 << 0);
3076                 break;
3077         case 2:
3078                 rdev->config.si.tile_config |= (1 << 0);
3079                 break;
3080         case 4:
3081                 rdev->config.si.tile_config |= (2 << 0);
3082                 break;
3083         case 8:
3084         default:
3085                 /* XXX what about 12? */
3086                 rdev->config.si.tile_config |= (3 << 0);
3087                 break;
3088         }       
3089         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3090         case 0: /* four banks */
3091                 rdev->config.si.tile_config |= 0 << 4;
3092                 break;
3093         case 1: /* eight banks */
3094                 rdev->config.si.tile_config |= 1 << 4;
3095                 break;
3096         case 2: /* sixteen banks */
3097         default:
3098                 rdev->config.si.tile_config |= 2 << 4;
3099                 break;
3100         }
3101         rdev->config.si.tile_config |=
3102                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3103         rdev->config.si.tile_config |=
3104                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3105
3106         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3107         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3108         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3109         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3110         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3111         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3112         if (rdev->has_uvd) {
3113                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3114                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3115                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3116         }
3117
3118         si_tiling_mode_table_init(rdev);
3119
3120         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3121                     rdev->config.si.max_sh_per_se,
3122                     rdev->config.si.max_backends_per_se);
3123
3124         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3125                      rdev->config.si.max_sh_per_se,
3126                      rdev->config.si.max_cu_per_sh);
3127
3128
3129         /* set HW defaults for 3D engine */
3130         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3131                                      ROQ_IB2_START(0x2b)));
3132         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3133
3134         sx_debug_1 = RREG32(SX_DEBUG_1);
3135         WREG32(SX_DEBUG_1, sx_debug_1);
3136
3137         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3138
3139         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3140                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3141                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3142                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3143
3144         WREG32(VGT_NUM_INSTANCES, 1);
3145
3146         WREG32(CP_PERFMON_CNTL, 0);
3147
3148         WREG32(SQ_CONFIG, 0);
3149
3150         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3151                                           FORCE_EOV_MAX_REZ_CNT(255)));
3152
3153         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3154                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3155
3156         WREG32(VGT_GS_VERTEX_REUSE, 16);
3157         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3158
3159         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3160         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3161         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3162         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3163         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3164         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3165         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3166         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3167
3168         tmp = RREG32(HDP_MISC_CNTL);
3169         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3170         WREG32(HDP_MISC_CNTL, tmp);
3171
3172         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3173         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3174
3175         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3176
3177         DRM_UDELAY(50);
3178 }
3179
3180 /*
3181  * GPU scratch registers helpers function.
3182  */
3183 static void si_scratch_init(struct radeon_device *rdev)
3184 {
3185         int i;
3186
3187         rdev->scratch.num_reg = 7;
3188         rdev->scratch.reg_base = SCRATCH_REG0;
3189         for (i = 0; i < rdev->scratch.num_reg; i++) {
3190                 rdev->scratch.free[i] = true;
3191                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3192         }
3193 }
3194
3195 void si_fence_ring_emit(struct radeon_device *rdev,
3196                         struct radeon_fence *fence)
3197 {
3198         struct radeon_ring *ring = &rdev->ring[fence->ring];
3199         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3200
3201         /* flush read cache over gart */
3202         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3203         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3204         radeon_ring_write(ring, 0);
3205         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3206         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3207                           PACKET3_TC_ACTION_ENA |
3208                           PACKET3_SH_KCACHE_ACTION_ENA |
3209                           PACKET3_SH_ICACHE_ACTION_ENA);
3210         radeon_ring_write(ring, 0xFFFFFFFF);
3211         radeon_ring_write(ring, 0);
3212         radeon_ring_write(ring, 10); /* poll interval */
3213         /* EVENT_WRITE_EOP - flush caches, send int */
3214         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3215         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3216         radeon_ring_write(ring, addr & 0xffffffff);
3217         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3218         radeon_ring_write(ring, fence->seq);
3219         radeon_ring_write(ring, 0);
3220 }
3221
3222 /*
3223  * IB stuff
3224  */
3225 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3226 {
3227         struct radeon_ring *ring = &rdev->ring[ib->ring];
3228         u32 header;
3229
3230         if (ib->is_const_ib) {
3231                 /* set switch buffer packet before const IB */
3232                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3233                 radeon_ring_write(ring, 0);
3234
3235                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3236         } else {
3237                 u32 next_rptr;
3238                 if (ring->rptr_save_reg) {
3239                         next_rptr = ring->wptr + 3 + 4 + 8;
3240                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3241                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3242                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3243                         radeon_ring_write(ring, next_rptr);
3244                 } else if (rdev->wb.enabled) {
3245                         next_rptr = ring->wptr + 5 + 4 + 8;
3246                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3247                         radeon_ring_write(ring, (1 << 8));
3248                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3249                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3250                         radeon_ring_write(ring, next_rptr);
3251                 }
3252
3253                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3254         }
3255
3256         radeon_ring_write(ring, header);
3257         radeon_ring_write(ring,
3258 #ifdef __BIG_ENDIAN
3259                           (2 << 0) |
3260 #endif
3261                           (ib->gpu_addr & 0xFFFFFFFC));
3262         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3263         radeon_ring_write(ring, ib->length_dw |
3264                           (ib->vm ? (ib->vm->id << 24) : 0));
3265
3266         if (!ib->is_const_ib) {
3267                 /* flush read cache over gart for this vmid */
3268                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3269                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3270                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3271                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3272                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3273                                   PACKET3_TC_ACTION_ENA |
3274                                   PACKET3_SH_KCACHE_ACTION_ENA |
3275                                   PACKET3_SH_ICACHE_ACTION_ENA);
3276                 radeon_ring_write(ring, 0xFFFFFFFF);
3277                 radeon_ring_write(ring, 0);
3278                 radeon_ring_write(ring, 10); /* poll interval */
3279         }
3280 }
3281
3282 /*
3283  * CP.
3284  */
3285 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3286 {
3287         if (enable)
3288                 WREG32(CP_ME_CNTL, 0);
3289         else {
3290                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3291                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3292                 WREG32(SCRATCH_UMSK, 0);
3293                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3294                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3295                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3296         }
3297         DRM_UDELAY(50);
3298 }
3299
3300 static int si_cp_load_microcode(struct radeon_device *rdev)
3301 {
3302         const __be32 *fw_data;
3303         int i;
3304
3305         if (!rdev->me_fw || !rdev->pfp_fw)
3306                 return -EINVAL;
3307
3308         si_cp_enable(rdev, false);
3309
3310         /* PFP */
3311         fw_data = (const __be32 *)rdev->pfp_fw->data;
3312         WREG32(CP_PFP_UCODE_ADDR, 0);
3313         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3314                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3315         WREG32(CP_PFP_UCODE_ADDR, 0);
3316
3317         /* CE */
3318         fw_data = (const __be32 *)rdev->ce_fw->data;
3319         WREG32(CP_CE_UCODE_ADDR, 0);
3320         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3321                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3322         WREG32(CP_CE_UCODE_ADDR, 0);
3323
3324         /* ME */
3325         fw_data = (const __be32 *)rdev->me_fw->data;
3326         WREG32(CP_ME_RAM_WADDR, 0);
3327         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3328                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3329         WREG32(CP_ME_RAM_WADDR, 0);
3330
3331         WREG32(CP_PFP_UCODE_ADDR, 0);
3332         WREG32(CP_CE_UCODE_ADDR, 0);
3333         WREG32(CP_ME_RAM_WADDR, 0);
3334         WREG32(CP_ME_RAM_RADDR, 0);
3335         return 0;
3336 }
3337
3338 static int si_cp_start(struct radeon_device *rdev)
3339 {
3340         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3341         int r, i;
3342
3343         r = radeon_ring_lock(rdev, ring, 7 + 4);
3344         if (r) {
3345                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3346                 return r;
3347         }
3348         /* init the CP */
3349         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3350         radeon_ring_write(ring, 0x1);
3351         radeon_ring_write(ring, 0x0);
3352         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3353         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3354         radeon_ring_write(ring, 0);
3355         radeon_ring_write(ring, 0);
3356
3357         /* init the CE partitions */
3358         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3359         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3360         radeon_ring_write(ring, 0xc000);
3361         radeon_ring_write(ring, 0xe000);
3362         radeon_ring_unlock_commit(rdev, ring);
3363
3364         si_cp_enable(rdev, true);
3365
3366         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3367         if (r) {
3368                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3369                 return r;
3370         }
3371
3372         /* setup clear context state */
3373         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3374         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3375
3376         for (i = 0; i < si_default_size; i++)
3377                 radeon_ring_write(ring, si_default_state[i]);
3378
3379         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3380         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3381
3382         /* set clear context state */
3383         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3384         radeon_ring_write(ring, 0);
3385
3386         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3387         radeon_ring_write(ring, 0x00000316);
3388         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3389         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3390
3391         radeon_ring_unlock_commit(rdev, ring);
3392
3393         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3394                 ring = &rdev->ring[i];
3395                 r = radeon_ring_lock(rdev, ring, 2);
3396
3397                 /* clear the compute context state */
3398                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3399                 radeon_ring_write(ring, 0);
3400
3401                 radeon_ring_unlock_commit(rdev, ring);
3402         }
3403
3404         return 0;
3405 }
3406
3407 static void si_cp_fini(struct radeon_device *rdev)
3408 {
3409         struct radeon_ring *ring;
3410         si_cp_enable(rdev, false);
3411
3412         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3413         radeon_ring_fini(rdev, ring);
3414         radeon_scratch_free(rdev, ring->rptr_save_reg);
3415
3416         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3417         radeon_ring_fini(rdev, ring);
3418         radeon_scratch_free(rdev, ring->rptr_save_reg);
3419
3420         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3421         radeon_ring_fini(rdev, ring);
3422         radeon_scratch_free(rdev, ring->rptr_save_reg);
3423 }
3424
3425 static int si_cp_resume(struct radeon_device *rdev)
3426 {
3427         struct radeon_ring *ring;
3428         u32 tmp;
3429         u32 rb_bufsz;
3430         int r;
3431
3432         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3433         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3434                                  SOFT_RESET_PA |
3435                                  SOFT_RESET_VGT |
3436                                  SOFT_RESET_SPI |
3437                                  SOFT_RESET_SX));
3438         RREG32(GRBM_SOFT_RESET);
3439         DRM_MDELAY(15);
3440         WREG32(GRBM_SOFT_RESET, 0);
3441         RREG32(GRBM_SOFT_RESET);
3442
3443         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3444         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3445
3446         /* Set the write pointer delay */
3447         WREG32(CP_RB_WPTR_DELAY, 0);
3448
3449         WREG32(CP_DEBUG, 0);
3450         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3451
3452         /* ring 0 - compute and gfx */
3453         /* Set ring buffer size */
3454         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3455         rb_bufsz = drm_order(ring->ring_size / 8);
3456         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3457 #ifdef __BIG_ENDIAN
3458         tmp |= BUF_SWAP_32BIT;
3459 #endif
3460         WREG32(CP_RB0_CNTL, tmp);
3461
3462         /* Initialize the ring buffer's read and write pointers */
3463         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3464         ring->wptr = 0;
3465         WREG32(CP_RB0_WPTR, ring->wptr);
3466
3467         /* set the wb address whether it's enabled or not */
3468         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3469         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3470
3471         if (rdev->wb.enabled)
3472                 WREG32(SCRATCH_UMSK, 0xff);
3473         else {
3474                 tmp |= RB_NO_UPDATE;
3475                 WREG32(SCRATCH_UMSK, 0);
3476         }
3477
3478         DRM_MDELAY(1);
3479         WREG32(CP_RB0_CNTL, tmp);
3480
3481         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3482
3483         ring->rptr = RREG32(CP_RB0_RPTR);
3484
3485         /* ring1  - compute only */
3486         /* Set ring buffer size */
3487         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3488         rb_bufsz = drm_order(ring->ring_size / 8);
3489         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3490 #ifdef __BIG_ENDIAN
3491         tmp |= BUF_SWAP_32BIT;
3492 #endif
3493         WREG32(CP_RB1_CNTL, tmp);
3494
3495         /* Initialize the ring buffer's read and write pointers */
3496         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3497         ring->wptr = 0;
3498         WREG32(CP_RB1_WPTR, ring->wptr);
3499
3500         /* set the wb address whether it's enabled or not */
3501         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3502         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3503
3504         DRM_MDELAY(1);
3505         WREG32(CP_RB1_CNTL, tmp);
3506
3507         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3508
3509         ring->rptr = RREG32(CP_RB1_RPTR);
3510
3511         /* ring2 - compute only */
3512         /* Set ring buffer size */
3513         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3514         rb_bufsz = drm_order(ring->ring_size / 8);
3515         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3516 #ifdef __BIG_ENDIAN
3517         tmp |= BUF_SWAP_32BIT;
3518 #endif
3519         WREG32(CP_RB2_CNTL, tmp);
3520
3521         /* Initialize the ring buffer's read and write pointers */
3522         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3523         ring->wptr = 0;
3524         WREG32(CP_RB2_WPTR, ring->wptr);
3525
3526         /* set the wb address whether it's enabled or not */
3527         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3528         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3529
3530         DRM_MDELAY(1);
3531         WREG32(CP_RB2_CNTL, tmp);
3532
3533         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3534
3535         ring->rptr = RREG32(CP_RB2_RPTR);
3536
3537         /* start the rings */
3538         si_cp_start(rdev);
3539         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3540         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3541         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3542         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3543         if (r) {
3544                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3545                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3546                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3547                 return r;
3548         }
3549         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3550         if (r) {
3551                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3552         }
3553         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3554         if (r) {
3555                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3556         }
3557
3558         return 0;
3559 }
3560
3561 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3562 {
3563         u32 reset_mask = 0;
3564         u32 tmp;
3565
3566         /* GRBM_STATUS */
3567         tmp = RREG32(GRBM_STATUS);
3568         if (tmp & (PA_BUSY | SC_BUSY |
3569                    BCI_BUSY | SX_BUSY |
3570                    TA_BUSY | VGT_BUSY |
3571                    DB_BUSY | CB_BUSY |
3572                    GDS_BUSY | SPI_BUSY |
3573                    IA_BUSY | IA_BUSY_NO_DMA))
3574                 reset_mask |= RADEON_RESET_GFX;
3575
3576         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3577                    CP_BUSY | CP_COHERENCY_BUSY))
3578                 reset_mask |= RADEON_RESET_CP;
3579
3580         if (tmp & GRBM_EE_BUSY)
3581                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3582
3583         /* GRBM_STATUS2 */
3584         tmp = RREG32(GRBM_STATUS2);
3585         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3586                 reset_mask |= RADEON_RESET_RLC;
3587
3588         /* DMA_STATUS_REG 0 */
3589         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3590         if (!(tmp & DMA_IDLE))
3591                 reset_mask |= RADEON_RESET_DMA;
3592
3593         /* DMA_STATUS_REG 1 */
3594         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3595         if (!(tmp & DMA_IDLE))
3596                 reset_mask |= RADEON_RESET_DMA1;
3597
3598         /* SRBM_STATUS2 */
3599         tmp = RREG32(SRBM_STATUS2);
3600         if (tmp & DMA_BUSY)
3601                 reset_mask |= RADEON_RESET_DMA;
3602
3603         if (tmp & DMA1_BUSY)
3604                 reset_mask |= RADEON_RESET_DMA1;
3605
3606         /* SRBM_STATUS */
3607         tmp = RREG32(SRBM_STATUS);
3608
3609         if (tmp & IH_BUSY)
3610                 reset_mask |= RADEON_RESET_IH;
3611
3612         if (tmp & SEM_BUSY)
3613                 reset_mask |= RADEON_RESET_SEM;
3614
3615         if (tmp & GRBM_RQ_PENDING)
3616                 reset_mask |= RADEON_RESET_GRBM;
3617
3618         if (tmp & VMC_BUSY)
3619                 reset_mask |= RADEON_RESET_VMC;
3620
3621         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3622                    MCC_BUSY | MCD_BUSY))
3623                 reset_mask |= RADEON_RESET_MC;
3624
3625         if (evergreen_is_display_hung(rdev))
3626                 reset_mask |= RADEON_RESET_DISPLAY;
3627
3628         /* VM_L2_STATUS */
3629         tmp = RREG32(VM_L2_STATUS);
3630         if (tmp & L2_BUSY)
3631                 reset_mask |= RADEON_RESET_VMC;
3632
3633         /* Skip MC reset as it's mostly likely not hung, just busy */
3634         if (reset_mask & RADEON_RESET_MC) {
3635                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3636                 reset_mask &= ~RADEON_RESET_MC;
3637         }
3638
3639         return reset_mask;
3640 }
3641
3642 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3643 {
3644         struct evergreen_mc_save save;
3645         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3646         u32 tmp;
3647
3648         if (reset_mask == 0)
3649                 return;
3650
3651         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3652
3653         evergreen_print_gpu_status_regs(rdev);
3654         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3655                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3656         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3657                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3658
3659         /* Disable CP parsing/prefetching */
3660         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3661
3662         if (reset_mask & RADEON_RESET_DMA) {
3663                 /* dma0 */
3664                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3665                 tmp &= ~DMA_RB_ENABLE;
3666                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3667         }
3668         if (reset_mask & RADEON_RESET_DMA1) {
3669                 /* dma1 */
3670                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3671                 tmp &= ~DMA_RB_ENABLE;
3672                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3673         }
3674
3675         DRM_UDELAY(50);
3676
3677         evergreen_mc_stop(rdev, &save);
3678         if (evergreen_mc_wait_for_idle(rdev)) {
3679                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3680         }
3681
3682         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3683                 grbm_soft_reset = SOFT_RESET_CB |
3684                         SOFT_RESET_DB |
3685                         SOFT_RESET_GDS |
3686                         SOFT_RESET_PA |
3687                         SOFT_RESET_SC |
3688                         SOFT_RESET_BCI |
3689                         SOFT_RESET_SPI |
3690                         SOFT_RESET_SX |
3691                         SOFT_RESET_TC |
3692                         SOFT_RESET_TA |
3693                         SOFT_RESET_VGT |
3694                         SOFT_RESET_IA;
3695         }
3696
3697         if (reset_mask & RADEON_RESET_CP) {
3698                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3699
3700                 srbm_soft_reset |= SOFT_RESET_GRBM;
3701         }
3702
3703         if (reset_mask & RADEON_RESET_DMA)
3704                 srbm_soft_reset |= SOFT_RESET_DMA;
3705
3706         if (reset_mask & RADEON_RESET_DMA1)
3707                 srbm_soft_reset |= SOFT_RESET_DMA1;
3708
3709         if (reset_mask & RADEON_RESET_DISPLAY)
3710                 srbm_soft_reset |= SOFT_RESET_DC;
3711
3712         if (reset_mask & RADEON_RESET_RLC)
3713                 grbm_soft_reset |= SOFT_RESET_RLC;
3714
3715         if (reset_mask & RADEON_RESET_SEM)
3716                 srbm_soft_reset |= SOFT_RESET_SEM;
3717
3718         if (reset_mask & RADEON_RESET_IH)
3719                 srbm_soft_reset |= SOFT_RESET_IH;
3720
3721         if (reset_mask & RADEON_RESET_GRBM)
3722                 srbm_soft_reset |= SOFT_RESET_GRBM;
3723
3724         if (reset_mask & RADEON_RESET_VMC)
3725                 srbm_soft_reset |= SOFT_RESET_VMC;
3726
3727         if (reset_mask & RADEON_RESET_MC)
3728                 srbm_soft_reset |= SOFT_RESET_MC;
3729
3730         if (grbm_soft_reset) {
3731                 tmp = RREG32(GRBM_SOFT_RESET);
3732                 tmp |= grbm_soft_reset;
3733                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3734                 WREG32(GRBM_SOFT_RESET, tmp);
3735                 tmp = RREG32(GRBM_SOFT_RESET);
3736
3737                 DRM_UDELAY(50);
3738
3739                 tmp &= ~grbm_soft_reset;
3740                 WREG32(GRBM_SOFT_RESET, tmp);
3741                 tmp = RREG32(GRBM_SOFT_RESET);
3742         }
3743
3744         if (srbm_soft_reset) {
3745                 tmp = RREG32(SRBM_SOFT_RESET);
3746                 tmp |= srbm_soft_reset;
3747                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3748                 WREG32(SRBM_SOFT_RESET, tmp);
3749                 tmp = RREG32(SRBM_SOFT_RESET);
3750
3751                 DRM_UDELAY(50);
3752
3753                 tmp &= ~srbm_soft_reset;
3754                 WREG32(SRBM_SOFT_RESET, tmp);
3755                 tmp = RREG32(SRBM_SOFT_RESET);
3756         }
3757
3758         /* Wait a little for things to settle down */
3759         DRM_UDELAY(50);
3760
3761         evergreen_mc_resume(rdev, &save);
3762         DRM_UDELAY(50);
3763
3764         evergreen_print_gpu_status_regs(rdev);
3765 }
3766
3767 int si_asic_reset(struct radeon_device *rdev)
3768 {
3769         u32 reset_mask;
3770
3771         reset_mask = si_gpu_check_soft_reset(rdev);
3772
3773         if (reset_mask)
3774                 r600_set_bios_scratch_engine_hung(rdev, true);
3775
3776         si_gpu_soft_reset(rdev, reset_mask);
3777
3778         reset_mask = si_gpu_check_soft_reset(rdev);
3779
3780         if (!reset_mask)
3781                 r600_set_bios_scratch_engine_hung(rdev, false);
3782
3783         return 0;
3784 }
3785
3786 /**
3787  * si_gfx_is_lockup - Check if the GFX engine is locked up
3788  *
3789  * @rdev: radeon_device pointer
3790  * @ring: radeon_ring structure holding ring information
3791  *
3792  * Check if the GFX engine is locked up.
3793  * Returns true if the engine appears to be locked up, false if not.
3794  */
3795 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3796 {
3797         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3798
3799         if (!(reset_mask & (RADEON_RESET_GFX |
3800                             RADEON_RESET_COMPUTE |
3801                             RADEON_RESET_CP))) {
3802                 radeon_ring_lockup_update(ring);
3803                 return false;
3804         }
3805         /* force CP activities */
3806         radeon_ring_force_activity(rdev, ring);
3807         return radeon_ring_test_lockup(rdev, ring);
3808 }
3809
3810 /**
3811  * si_dma_is_lockup - Check if the DMA engine is locked up
3812  *
3813  * @rdev: radeon_device pointer
3814  * @ring: radeon_ring structure holding ring information
3815  *
3816  * Check if the async DMA engine is locked up.
3817  * Returns true if the engine appears to be locked up, false if not.
3818  */
3819 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3820 {
3821         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3822         u32 mask;
3823
3824         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3825                 mask = RADEON_RESET_DMA;
3826         else
3827                 mask = RADEON_RESET_DMA1;
3828
3829         if (!(reset_mask & mask)) {
3830                 radeon_ring_lockup_update(ring);
3831                 return false;
3832         }
3833         /* force ring activities */
3834         radeon_ring_force_activity(rdev, ring);
3835         return radeon_ring_test_lockup(rdev, ring);
3836 }
3837
3838 /* MC */
3839 static void si_mc_program(struct radeon_device *rdev)
3840 {
3841         struct evergreen_mc_save save;
3842         u32 tmp;
3843         int i, j;
3844
3845         /* Initialize HDP */
3846         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3847                 WREG32((0x2c14 + j), 0x00000000);
3848                 WREG32((0x2c18 + j), 0x00000000);
3849                 WREG32((0x2c1c + j), 0x00000000);
3850                 WREG32((0x2c20 + j), 0x00000000);
3851                 WREG32((0x2c24 + j), 0x00000000);
3852         }
3853         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3854
3855         evergreen_mc_stop(rdev, &save);
3856         if (radeon_mc_wait_for_idle(rdev)) {
3857                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3858         }
3859         if (!ASIC_IS_NODCE(rdev))
3860                 /* Lockout access through VGA aperture*/
3861                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3862         /* Update configuration */
3863         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3864                rdev->mc.vram_start >> 12);
3865         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3866                rdev->mc.vram_end >> 12);
3867         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3868                rdev->vram_scratch.gpu_addr >> 12);
3869         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3870         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3871         WREG32(MC_VM_FB_LOCATION, tmp);
3872         /* XXX double check these! */
3873         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3874         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3875         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3876         WREG32(MC_VM_AGP_BASE, 0);
3877         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3878         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3879         if (radeon_mc_wait_for_idle(rdev)) {
3880                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3881         }
3882         evergreen_mc_resume(rdev, &save);
3883         if (!ASIC_IS_NODCE(rdev)) {
3884                 /* we need to own VRAM, so turn off the VGA renderer here
3885                  * to stop it overwriting our objects */
3886                 rv515_vga_render_disable(rdev);
3887         }
3888 }
3889
3890 void si_vram_gtt_location(struct radeon_device *rdev,
3891                           struct radeon_mc *mc)
3892 {
3893         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3894                 /* leave room for at least 1024M GTT */
3895                 dev_warn(rdev->dev, "limiting VRAM\n");
3896                 mc->real_vram_size = 0xFFC0000000ULL;
3897                 mc->mc_vram_size = 0xFFC0000000ULL;
3898         }
3899         radeon_vram_location(rdev, &rdev->mc, 0);
3900         rdev->mc.gtt_base_align = 0;
3901         radeon_gtt_location(rdev, mc);
3902 }
3903
3904 static int si_mc_init(struct radeon_device *rdev)
3905 {
3906         u32 tmp;
3907         int chansize, numchan;
3908
3909         /* Get VRAM informations */
3910         rdev->mc.vram_is_ddr = true;
3911         tmp = RREG32(MC_ARB_RAMCFG);
3912         if (tmp & CHANSIZE_OVERRIDE) {
3913                 chansize = 16;
3914         } else if (tmp & CHANSIZE_MASK) {
3915                 chansize = 64;
3916         } else {
3917                 chansize = 32;
3918         }
3919         tmp = RREG32(MC_SHARED_CHMAP);
3920         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3921         case 0:
3922         default:
3923                 numchan = 1;
3924                 break;
3925         case 1:
3926                 numchan = 2;
3927                 break;
3928         case 2:
3929                 numchan = 4;
3930                 break;
3931         case 3:
3932                 numchan = 8;
3933                 break;
3934         case 4:
3935                 numchan = 3;
3936                 break;
3937         case 5:
3938                 numchan = 6;
3939                 break;
3940         case 6:
3941                 numchan = 10;
3942                 break;
3943         case 7:
3944                 numchan = 12;
3945                 break;
3946         case 8:
3947                 numchan = 16;
3948                 break;
3949         }
3950         rdev->mc.vram_width = numchan * chansize;
3951         /* Could aper size report 0 ? */
3952         rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
3953         rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
3954         /* size in MB on si */
3955         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3956         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3957         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3958         si_vram_gtt_location(rdev, &rdev->mc);
3959         radeon_update_bandwidth_info(rdev);
3960
3961         return 0;
3962 }
3963
3964 /*
3965  * GART
3966  */
3967 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3968 {
3969         /* flush hdp cache */
3970         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3971
3972         /* bits 0-15 are the VM contexts0-15 */
3973         WREG32(VM_INVALIDATE_REQUEST, 1);
3974 }
3975
3976 static int si_pcie_gart_enable(struct radeon_device *rdev)
3977 {
3978         int r, i;
3979
3980         if (rdev->gart.robj == NULL) {
3981                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3982                 return -EINVAL;
3983         }
3984         r = radeon_gart_table_vram_pin(rdev);
3985         if (r)
3986                 return r;
3987         radeon_gart_restore(rdev);
3988         /* Setup TLB control */
3989         WREG32(MC_VM_MX_L1_TLB_CNTL,
3990                (0xA << 7) |
3991                ENABLE_L1_TLB |
3992                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3993                ENABLE_ADVANCED_DRIVER_MODEL |
3994                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3995         /* Setup L2 cache */
3996         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3997                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3998                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3999                EFFECTIVE_L2_QUEUE_SIZE(7) |
4000                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4001         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4002         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4003                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4004         /* setup context0 */
4005         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4006         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4007         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4008         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4009                         (u32)(rdev->dummy_page.addr >> 12));
4010         WREG32(VM_CONTEXT0_CNTL2, 0);
4011         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4012                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4013
4014         WREG32(0x15D4, 0);
4015         WREG32(0x15D8, 0);
4016         WREG32(0x15DC, 0);
4017
4018         /* empty context1-15 */
4019         /* set vm size, must be a multiple of 4 */
4020         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4021         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4022         /* Assign the pt base to something valid for now; the pts used for
4023          * the VMs are determined by the application and setup and assigned
4024          * on the fly in the vm part of radeon_gart.c
4025          */
4026         for (i = 1; i < 16; i++) {
4027                 if (i < 8)
4028                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4029                                rdev->gart.table_addr >> 12);
4030                 else
4031                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4032                                rdev->gart.table_addr >> 12);
4033         }
4034
4035         /* enable context1-15 */
4036         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4037                (u32)(rdev->dummy_page.addr >> 12));
4038         WREG32(VM_CONTEXT1_CNTL2, 4);
4039         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4040                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4041                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4042                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4043                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4044                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4045                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4046                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4047                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4048                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4049                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4050                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4051                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4052
4053         si_pcie_gart_tlb_flush(rdev);
4054         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4055                  (unsigned)(rdev->mc.gtt_size >> 20),
4056                  (unsigned long long)rdev->gart.table_addr);
4057         rdev->gart.ready = true;
4058         return 0;
4059 }
4060
4061 static void si_pcie_gart_disable(struct radeon_device *rdev)
4062 {
4063         /* Disable all tables */
4064         WREG32(VM_CONTEXT0_CNTL, 0);
4065         WREG32(VM_CONTEXT1_CNTL, 0);
4066         /* Setup TLB control */
4067         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4068                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4069         /* Setup L2 cache */
4070         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4071                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4072                EFFECTIVE_L2_QUEUE_SIZE(7) |
4073                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4074         WREG32(VM_L2_CNTL2, 0);
4075         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4076                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4077         radeon_gart_table_vram_unpin(rdev);
4078 }
4079
4080 static void si_pcie_gart_fini(struct radeon_device *rdev)
4081 {
4082         si_pcie_gart_disable(rdev);
4083         radeon_gart_table_vram_free(rdev);
4084         radeon_gart_fini(rdev);
4085 }
4086
4087 /* vm parser */
4088 static bool si_vm_reg_valid(u32 reg)
4089 {
4090         /* context regs are fine */
4091         if (reg >= 0x28000)
4092                 return true;
4093
4094         /* check config regs */
4095         switch (reg) {
4096         case GRBM_GFX_INDEX:
4097         case CP_STRMOUT_CNTL:
4098         case VGT_VTX_VECT_EJECT_REG:
4099         case VGT_CACHE_INVALIDATION:
4100         case VGT_ESGS_RING_SIZE:
4101         case VGT_GSVS_RING_SIZE:
4102         case VGT_GS_VERTEX_REUSE:
4103         case VGT_PRIMITIVE_TYPE:
4104         case VGT_INDEX_TYPE:
4105         case VGT_NUM_INDICES:
4106         case VGT_NUM_INSTANCES:
4107         case VGT_TF_RING_SIZE:
4108         case VGT_HS_OFFCHIP_PARAM:
4109         case VGT_TF_MEMORY_BASE:
4110         case PA_CL_ENHANCE:
4111         case PA_SU_LINE_STIPPLE_VALUE:
4112         case PA_SC_LINE_STIPPLE_STATE:
4113         case PA_SC_ENHANCE:
4114         case SQC_CACHES:
4115         case SPI_STATIC_THREAD_MGMT_1:
4116         case SPI_STATIC_THREAD_MGMT_2:
4117         case SPI_STATIC_THREAD_MGMT_3:
4118         case SPI_PS_MAX_WAVE_ID:
4119         case SPI_CONFIG_CNTL:
4120         case SPI_CONFIG_CNTL_1:
4121         case TA_CNTL_AUX:
4122                 return true;
4123         default:
4124                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4125                 return false;
4126         }
4127 }
4128
4129 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4130                                   u32 *ib, struct radeon_cs_packet *pkt)
4131 {
4132         switch (pkt->opcode) {
4133         case PACKET3_NOP:
4134         case PACKET3_SET_BASE:
4135         case PACKET3_SET_CE_DE_COUNTERS:
4136         case PACKET3_LOAD_CONST_RAM:
4137         case PACKET3_WRITE_CONST_RAM:
4138         case PACKET3_WRITE_CONST_RAM_OFFSET:
4139         case PACKET3_DUMP_CONST_RAM:
4140         case PACKET3_INCREMENT_CE_COUNTER:
4141         case PACKET3_WAIT_ON_DE_COUNTER:
4142         case PACKET3_CE_WRITE:
4143                 break;
4144         default:
4145                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4146                 return -EINVAL;
4147         }
4148         return 0;
4149 }
4150
4151 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4152                                    u32 *ib, struct radeon_cs_packet *pkt)
4153 {
4154         u32 idx = pkt->idx + 1;
4155         u32 idx_value = ib[idx];
4156         u32 start_reg, end_reg, reg, i;
4157         u32 command, info;
4158
4159         switch (pkt->opcode) {
4160         case PACKET3_NOP:
4161         case PACKET3_SET_BASE:
4162         case PACKET3_CLEAR_STATE:
4163         case PACKET3_INDEX_BUFFER_SIZE:
4164         case PACKET3_DISPATCH_DIRECT:
4165         case PACKET3_DISPATCH_INDIRECT:
4166         case PACKET3_ALLOC_GDS:
4167         case PACKET3_WRITE_GDS_RAM:
4168         case PACKET3_ATOMIC_GDS:
4169         case PACKET3_ATOMIC:
4170         case PACKET3_OCCLUSION_QUERY:
4171         case PACKET3_SET_PREDICATION:
4172         case PACKET3_COND_EXEC:
4173         case PACKET3_PRED_EXEC:
4174         case PACKET3_DRAW_INDIRECT:
4175         case PACKET3_DRAW_INDEX_INDIRECT:
4176         case PACKET3_INDEX_BASE:
4177         case PACKET3_DRAW_INDEX_2:
4178         case PACKET3_CONTEXT_CONTROL:
4179         case PACKET3_INDEX_TYPE:
4180         case PACKET3_DRAW_INDIRECT_MULTI:
4181         case PACKET3_DRAW_INDEX_AUTO:
4182         case PACKET3_DRAW_INDEX_IMMD:
4183         case PACKET3_NUM_INSTANCES:
4184         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4185         case PACKET3_STRMOUT_BUFFER_UPDATE:
4186         case PACKET3_DRAW_INDEX_OFFSET_2:
4187         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4188         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4189         case PACKET3_MPEG_INDEX:
4190         case PACKET3_WAIT_REG_MEM:
4191         case PACKET3_MEM_WRITE:
4192         case PACKET3_PFP_SYNC_ME:
4193         case PACKET3_SURFACE_SYNC:
4194         case PACKET3_EVENT_WRITE:
4195         case PACKET3_EVENT_WRITE_EOP:
4196         case PACKET3_EVENT_WRITE_EOS:
4197         case PACKET3_SET_CONTEXT_REG:
4198         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4199         case PACKET3_SET_SH_REG:
4200         case PACKET3_SET_SH_REG_OFFSET:
4201         case PACKET3_INCREMENT_DE_COUNTER:
4202         case PACKET3_WAIT_ON_CE_COUNTER:
4203         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4204         case PACKET3_ME_WRITE:
4205                 break;
4206         case PACKET3_COPY_DATA:
4207                 if ((idx_value & 0xf00) == 0) {
4208                         reg = ib[idx + 3] * 4;
4209                         if (!si_vm_reg_valid(reg))
4210                                 return -EINVAL;
4211                 }
4212                 break;
4213         case PACKET3_WRITE_DATA:
4214                 if ((idx_value & 0xf00) == 0) {
4215                         start_reg = ib[idx + 1] * 4;
4216                         if (idx_value & 0x10000) {
4217                                 if (!si_vm_reg_valid(start_reg))
4218                                         return -EINVAL;
4219                         } else {
4220                                 for (i = 0; i < (pkt->count - 2); i++) {
4221                                         reg = start_reg + (4 * i);
4222                                         if (!si_vm_reg_valid(reg))
4223                                                 return -EINVAL;
4224                                 }
4225                         }
4226                 }
4227                 break;
4228         case PACKET3_COND_WRITE:
4229                 if (idx_value & 0x100) {
4230                         reg = ib[idx + 5] * 4;
4231                         if (!si_vm_reg_valid(reg))
4232                                 return -EINVAL;
4233                 }
4234                 break;
4235         case PACKET3_COPY_DW:
4236                 if (idx_value & 0x2) {
4237                         reg = ib[idx + 3] * 4;
4238                         if (!si_vm_reg_valid(reg))
4239                                 return -EINVAL;
4240                 }
4241                 break;
4242         case PACKET3_SET_CONFIG_REG:
4243                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4244                 end_reg = 4 * pkt->count + start_reg - 4;
4245                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4246                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4247                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4248                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4249                         return -EINVAL;
4250                 }
4251                 for (i = 0; i < pkt->count; i++) {
4252                         reg = start_reg + (4 * i);
4253                         if (!si_vm_reg_valid(reg))
4254                                 return -EINVAL;
4255                 }
4256                 break;
4257         case PACKET3_CP_DMA:
4258                 command = ib[idx + 4];
4259                 info = ib[idx + 1];
4260                 if (command & PACKET3_CP_DMA_CMD_SAS) {
4261                         /* src address space is register */
4262                         if (((info & 0x60000000) >> 29) == 0) {
4263                                 start_reg = idx_value << 2;
4264                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
4265                                         reg = start_reg;
4266                                         if (!si_vm_reg_valid(reg)) {
4267                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4268                                                 return -EINVAL;
4269                                         }
4270                                 } else {
4271                                         for (i = 0; i < (command & 0x1fffff); i++) {
4272                                                 reg = start_reg + (4 * i);
4273                                                 if (!si_vm_reg_valid(reg)) {
4274                                                         DRM_ERROR("CP DMA Bad SRC register\n");
4275                                                         return -EINVAL;
4276                                                 }
4277                                         }
4278                                 }
4279                         }
4280                 }
4281                 if (command & PACKET3_CP_DMA_CMD_DAS) {
4282                         /* dst address space is register */
4283                         if (((info & 0x00300000) >> 20) == 0) {
4284                                 start_reg = ib[idx + 2];
4285                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
4286                                         reg = start_reg;
4287                                         if (!si_vm_reg_valid(reg)) {
4288                                                 DRM_ERROR("CP DMA Bad DST register\n");
4289                                                 return -EINVAL;
4290                                         }
4291                                 } else {
4292                                         for (i = 0; i < (command & 0x1fffff); i++) {
4293                                                 reg = start_reg + (4 * i);
4294                                                 if (!si_vm_reg_valid(reg)) {
4295                                                         DRM_ERROR("CP DMA Bad DST register\n");
4296                                                         return -EINVAL;
4297                                                 }
4298                                         }
4299                                 }
4300                         }
4301                 }
4302                 break;
4303         default:
4304                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4305                 return -EINVAL;
4306         }
4307         return 0;
4308 }
4309
4310 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4311                                        u32 *ib, struct radeon_cs_packet *pkt)
4312 {
4313         u32 idx = pkt->idx + 1;
4314         u32 idx_value = ib[idx];
4315         u32 start_reg, reg, i;
4316
4317         switch (pkt->opcode) {
4318         case PACKET3_NOP:
4319         case PACKET3_SET_BASE:
4320         case PACKET3_CLEAR_STATE:
4321         case PACKET3_DISPATCH_DIRECT:
4322         case PACKET3_DISPATCH_INDIRECT:
4323         case PACKET3_ALLOC_GDS:
4324         case PACKET3_WRITE_GDS_RAM:
4325         case PACKET3_ATOMIC_GDS:
4326         case PACKET3_ATOMIC:
4327         case PACKET3_OCCLUSION_QUERY:
4328         case PACKET3_SET_PREDICATION:
4329         case PACKET3_COND_EXEC:
4330         case PACKET3_PRED_EXEC:
4331         case PACKET3_CONTEXT_CONTROL:
4332         case PACKET3_STRMOUT_BUFFER_UPDATE:
4333         case PACKET3_WAIT_REG_MEM:
4334         case PACKET3_MEM_WRITE:
4335         case PACKET3_PFP_SYNC_ME:
4336         case PACKET3_SURFACE_SYNC:
4337         case PACKET3_EVENT_WRITE:
4338         case PACKET3_EVENT_WRITE_EOP:
4339         case PACKET3_EVENT_WRITE_EOS:
4340         case PACKET3_SET_CONTEXT_REG:
4341         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4342         case PACKET3_SET_SH_REG:
4343         case PACKET3_SET_SH_REG_OFFSET:
4344         case PACKET3_INCREMENT_DE_COUNTER:
4345         case PACKET3_WAIT_ON_CE_COUNTER:
4346         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4347         case PACKET3_ME_WRITE:
4348                 break;
4349         case PACKET3_COPY_DATA:
4350                 if ((idx_value & 0xf00) == 0) {
4351                         reg = ib[idx + 3] * 4;
4352                         if (!si_vm_reg_valid(reg))
4353                                 return -EINVAL;
4354                 }
4355                 break;
4356         case PACKET3_WRITE_DATA:
4357                 if ((idx_value & 0xf00) == 0) {
4358                         start_reg = ib[idx + 1] * 4;
4359                         if (idx_value & 0x10000) {
4360                                 if (!si_vm_reg_valid(start_reg))
4361                                         return -EINVAL;
4362                         } else {
4363                                 for (i = 0; i < (pkt->count - 2); i++) {
4364                                         reg = start_reg + (4 * i);
4365                                         if (!si_vm_reg_valid(reg))
4366                                                 return -EINVAL;
4367                                 }
4368                         }
4369                 }
4370                 break;
4371         case PACKET3_COND_WRITE:
4372                 if (idx_value & 0x100) {
4373                         reg = ib[idx + 5] * 4;
4374                         if (!si_vm_reg_valid(reg))
4375                                 return -EINVAL;
4376                 }
4377                 break;
4378         case PACKET3_COPY_DW:
4379                 if (idx_value & 0x2) {
4380                         reg = ib[idx + 3] * 4;
4381                         if (!si_vm_reg_valid(reg))
4382                                 return -EINVAL;
4383                 }
4384                 break;
4385         default:
4386                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4387                 return -EINVAL;
4388         }
4389         return 0;
4390 }
4391
4392 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4393 {
4394         int ret = 0;
4395         u32 idx = 0;
4396         struct radeon_cs_packet pkt;
4397
4398         do {
4399                 pkt.idx = idx;
4400                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4401                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4402                 pkt.one_reg_wr = 0;
4403                 switch (pkt.type) {
4404                 case RADEON_PACKET_TYPE0:
4405                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4406                         ret = -EINVAL;
4407                         break;
4408                 case RADEON_PACKET_TYPE2:
4409                         idx += 1;
4410                         break;
4411                 case RADEON_PACKET_TYPE3:
4412                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4413                         if (ib->is_const_ib)
4414                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4415                         else {
4416                                 switch (ib->ring) {
4417                                 case RADEON_RING_TYPE_GFX_INDEX:
4418                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4419                                         break;
4420                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4421                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4422                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4423                                         break;
4424                                 default:
4425                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4426                                         ret = -EINVAL;
4427                                         break;
4428                                 }
4429                         }
4430                         idx += pkt.count + 2;
4431                         break;
4432                 default:
4433                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4434                         ret = -EINVAL;
4435                         break;
4436                 }
4437                 if (ret)
4438                         break;
4439         } while (idx < ib->length_dw);
4440
4441         return ret;
4442 }
4443
4444 /*
4445  * vm
4446  */
4447 int si_vm_init(struct radeon_device *rdev)
4448 {
4449         /* number of VMs */
4450         rdev->vm_manager.nvm = 16;
4451         /* base offset of vram pages */
4452         rdev->vm_manager.vram_base_offset = 0;
4453
4454         return 0;
4455 }
4456
4457 void si_vm_fini(struct radeon_device *rdev)
4458 {
4459 }
4460
4461 /**
4462  * si_vm_decode_fault - print human readable fault info
4463  *
4464  * @rdev: radeon_device pointer
4465  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4466  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4467  *
4468  * Print human readable fault information (SI).
4469  */
4470 static void si_vm_decode_fault(struct radeon_device *rdev,
4471                                u32 status, u32 addr)
4472 {
4473         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4474         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4475         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4476         char *block;
4477
4478         if (rdev->family == CHIP_TAHITI) {
4479                 switch (mc_id) {
4480                 case 160:
4481                 case 144:
4482                 case 96:
4483                 case 80:
4484                 case 224:
4485                 case 208:
4486                 case 32:
4487                 case 16:
4488                         block = "CB";
4489                         break;
4490                 case 161:
4491                 case 145:
4492                 case 97:
4493                 case 81:
4494                 case 225:
4495                 case 209:
4496                 case 33:
4497                 case 17:
4498                         block = "CB_FMASK";
4499                         break;
4500                 case 162:
4501                 case 146:
4502                 case 98:
4503                 case 82:
4504                 case 226:
4505                 case 210:
4506                 case 34:
4507                 case 18:
4508                         block = "CB_CMASK";
4509                         break;
4510                 case 163:
4511                 case 147:
4512                 case 99:
4513                 case 83:
4514                 case 227:
4515                 case 211:
4516                 case 35:
4517                 case 19:
4518                         block = "CB_IMMED";
4519                         break;
4520                 case 164:
4521                 case 148:
4522                 case 100:
4523                 case 84:
4524                 case 228:
4525                 case 212:
4526                 case 36:
4527                 case 20:
4528                         block = "DB";
4529                         break;
4530                 case 165:
4531                 case 149:
4532                 case 101:
4533                 case 85:
4534                 case 229:
4535                 case 213:
4536                 case 37:
4537                 case 21:
4538                         block = "DB_HTILE";
4539                         break;
4540                 case 167:
4541                 case 151:
4542                 case 103:
4543                 case 87:
4544                 case 231:
4545                 case 215:
4546                 case 39:
4547                 case 23:
4548                         block = "DB_STEN";
4549                         break;
4550                 case 72:
4551                 case 68:
4552                 case 64:
4553                 case 8:
4554                 case 4:
4555                 case 0:
4556                 case 136:
4557                 case 132:
4558                 case 128:
4559                 case 200:
4560                 case 196:
4561                 case 192:
4562                         block = "TC";
4563                         break;
4564                 case 112:
4565                 case 48:
4566                         block = "CP";
4567                         break;
4568                 case 49:
4569                 case 177:
4570                 case 50:
4571                 case 178:
4572                         block = "SH";
4573                         break;
4574                 case 53:
4575                 case 190:
4576                         block = "VGT";
4577                         break;
4578                 case 117:
4579                         block = "IH";
4580                         break;
4581                 case 51:
4582                 case 115:
4583                         block = "RLC";
4584                         break;
4585                 case 119:
4586                 case 183:
4587                         block = "DMA0";
4588                         break;
4589                 case 61:
4590                         block = "DMA1";
4591                         break;
4592                 case 248:
4593                 case 120:
4594                         block = "HDP";
4595                         break;
4596                 default:
4597                         block = "unknown";
4598                         break;
4599                 }
4600         } else {
4601                 switch (mc_id) {
4602                 case 32:
4603                 case 16:
4604                 case 96:
4605                 case 80:
4606                 case 160:
4607                 case 144:
4608                 case 224:
4609                 case 208:
4610                         block = "CB";
4611                         break;
4612                 case 33:
4613                 case 17:
4614                 case 97:
4615                 case 81:
4616                 case 161:
4617                 case 145:
4618                 case 225:
4619                 case 209:
4620                         block = "CB_FMASK";
4621                         break;
4622                 case 34:
4623                 case 18:
4624                 case 98:
4625                 case 82:
4626                 case 162:
4627                 case 146:
4628                 case 226:
4629                 case 210:
4630                         block = "CB_CMASK";
4631                         break;
4632                 case 35:
4633                 case 19:
4634                 case 99:
4635                 case 83:
4636                 case 163:
4637                 case 147:
4638                 case 227:
4639                 case 211:
4640                         block = "CB_IMMED";
4641                         break;
4642                 case 36:
4643                 case 20:
4644                 case 100:
4645                 case 84:
4646                 case 164:
4647                 case 148:
4648                 case 228:
4649                 case 212:
4650                         block = "DB";
4651                         break;
4652                 case 37:
4653                 case 21:
4654                 case 101:
4655                 case 85:
4656                 case 165:
4657                 case 149:
4658                 case 229:
4659                 case 213:
4660                         block = "DB_HTILE";
4661                         break;
4662                 case 39:
4663                 case 23:
4664                 case 103:
4665                 case 87:
4666                 case 167:
4667                 case 151:
4668                 case 231:
4669                 case 215:
4670                         block = "DB_STEN";
4671                         break;
4672                 case 72:
4673                 case 68:
4674                 case 8:
4675                 case 4:
4676                 case 136:
4677                 case 132:
4678                 case 200:
4679                 case 196:
4680                         block = "TC";
4681                         break;
4682                 case 112:
4683                 case 48:
4684                         block = "CP";
4685                         break;
4686                 case 49:
4687                 case 177:
4688                 case 50:
4689                 case 178:
4690                         block = "SH";
4691                         break;
4692                 case 53:
4693                         block = "VGT";
4694                         break;
4695                 case 117:
4696                         block = "IH";
4697                         break;
4698                 case 51:
4699                 case 115:
4700                         block = "RLC";
4701                         break;
4702                 case 119:
4703                 case 183:
4704                         block = "DMA0";
4705                         break;
4706                 case 61:
4707                         block = "DMA1";
4708                         break;
4709                 case 248:
4710                 case 120:
4711                         block = "HDP";
4712                         break;
4713                 default:
4714                         block = "unknown";
4715                         break;
4716                 }
4717         }
4718
4719         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4720                protections, vmid, addr,
4721                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4722                block, mc_id);
4723 }
4724
4725 /**
4726  * si_vm_set_page - update the page tables using the CP
4727  *
4728  * @rdev: radeon_device pointer
4729  * @ib: indirect buffer to fill with commands
4730  * @pe: addr of the page entry
4731  * @addr: dst addr to write into pe
4732  * @count: number of page entries to update
4733  * @incr: increase next addr by incr bytes
4734  * @flags: access flags
4735  *
4736  * Update the page tables using the CP (SI).
4737  */
4738 void si_vm_set_page(struct radeon_device *rdev,
4739                     struct radeon_ib *ib,
4740                     uint64_t pe,
4741                     uint64_t addr, unsigned count,
4742                     uint32_t incr, uint32_t flags)
4743 {
4744         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4745         uint64_t value;
4746         unsigned ndw;
4747
4748         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4749                 while (count) {
4750                         ndw = 2 + count * 2;
4751                         if (ndw > 0x3FFE)
4752                                 ndw = 0x3FFE;
4753
4754                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4755                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4756                                         WRITE_DATA_DST_SEL(1));
4757                         ib->ptr[ib->length_dw++] = pe;
4758                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4759                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4760                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4761                                         value = radeon_vm_map_gart(rdev, addr);
4762                                         value &= 0xFFFFFFFFFFFFF000ULL;
4763                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4764                                         value = addr;
4765                                 } else {
4766                                         value = 0;
4767                                 }
4768                                 addr += incr;
4769                                 value |= r600_flags;
4770                                 ib->ptr[ib->length_dw++] = value;
4771                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4772                         }
4773                 }
4774         } else {
4775                 /* DMA */
4776                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4777                         while (count) {
4778                                 ndw = count * 2;
4779                                 if (ndw > 0xFFFFE)
4780                                         ndw = 0xFFFFE;
4781
4782                                 /* for non-physically contiguous pages (system) */
4783                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4784                                 ib->ptr[ib->length_dw++] = pe;
4785                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4786                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4787                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4788                                                 value = radeon_vm_map_gart(rdev, addr);
4789                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4790                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4791                                                 value = addr;
4792                                         } else {
4793                                                 value = 0;
4794                                         }
4795                                         addr += incr;
4796                                         value |= r600_flags;
4797                                         ib->ptr[ib->length_dw++] = value;
4798                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4799                                 }
4800                         }
4801                 } else {
4802                         while (count) {
4803                                 ndw = count * 2;
4804                                 if (ndw > 0xFFFFE)
4805                                         ndw = 0xFFFFE;
4806
4807                                 if (flags & RADEON_VM_PAGE_VALID)
4808                                         value = addr;
4809                                 else
4810                                         value = 0;
4811                                 /* for physically contiguous pages (vram) */
4812                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4813                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4814                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4815                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4816                                 ib->ptr[ib->length_dw++] = 0;
4817                                 ib->ptr[ib->length_dw++] = value; /* value */
4818                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4819                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4820                                 ib->ptr[ib->length_dw++] = 0;
4821                                 pe += ndw * 4;
4822                                 addr += (ndw / 2) * incr;
4823                                 count -= ndw / 2;
4824                         }
4825                 }
4826                 while (ib->length_dw & 0x7)
4827                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4828         }
4829 }
4830
4831 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4832 {
4833         struct radeon_ring *ring = &rdev->ring[ridx];
4834
4835         if (vm == NULL)
4836                 return;
4837
4838         /* write new base address */
4839         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4840         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4841                                  WRITE_DATA_DST_SEL(0)));
4842
4843         if (vm->id < 8) {
4844                 radeon_ring_write(ring,
4845                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4846         } else {
4847                 radeon_ring_write(ring,
4848                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4849         }
4850         radeon_ring_write(ring, 0);
4851         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4852
4853         /* flush hdp cache */
4854         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4855         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4856                                  WRITE_DATA_DST_SEL(0)));
4857         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4858         radeon_ring_write(ring, 0);
4859         radeon_ring_write(ring, 0x1);
4860
4861         /* bits 0-15 are the VM contexts0-15 */
4862         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4863         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4864                                  WRITE_DATA_DST_SEL(0)));
4865         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4866         radeon_ring_write(ring, 0);
4867         radeon_ring_write(ring, 1 << vm->id);
4868
4869         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4870         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4871         radeon_ring_write(ring, 0x0);
4872 }
4873
4874 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4875 {
4876         struct radeon_ring *ring = &rdev->ring[ridx];
4877
4878         if (vm == NULL)
4879                 return;
4880
4881         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4882         if (vm->id < 8) {
4883                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4884         } else {
4885                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4886         }
4887         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4888
4889         /* flush hdp cache */
4890         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4891         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4892         radeon_ring_write(ring, 1);
4893
4894         /* bits 0-7 are the VM contexts0-7 */
4895         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4896         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4897         radeon_ring_write(ring, 1 << vm->id);
4898 }
4899
4900 /*
4901  *  Power and clock gating
4902  */
4903 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4904 {
4905         int i;
4906
4907         for (i = 0; i < rdev->usec_timeout; i++) {
4908                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4909                         break;
4910                 DRM_UDELAY(1);
4911         }
4912
4913         for (i = 0; i < rdev->usec_timeout; i++) {
4914                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4915                         break;
4916                 DRM_UDELAY(1);
4917         }
4918 }
4919
4920 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4921                                          bool enable)
4922 {
4923         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4924         u32 mask;
4925         int i;
4926
4927         if (enable)
4928                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4929         else
4930                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4931         WREG32(CP_INT_CNTL_RING0, tmp);
4932
4933         if (!enable) {
4934                 /* read a gfx register */
4935                 tmp = RREG32(DB_DEPTH_INFO);
4936
4937                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4938                 for (i = 0; i < rdev->usec_timeout; i++) {
4939                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4940                                 break;
4941                         DRM_UDELAY(1);
4942                 }
4943         }
4944 }
4945
4946 static void si_set_uvd_dcm(struct radeon_device *rdev,
4947                            bool sw_mode)
4948 {
4949         u32 tmp, tmp2;
4950
4951         tmp = RREG32(UVD_CGC_CTRL);
4952         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4953         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4954
4955         if (sw_mode) {
4956                 tmp &= ~0x7ffff800;
4957                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4958         } else {
4959                 tmp |= 0x7ffff800;
4960                 tmp2 = 0;
4961         }
4962
4963         WREG32(UVD_CGC_CTRL, tmp);
4964         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4965 }
4966
4967 static void si_init_uvd_internal_cg(struct radeon_device *rdev)
4968 {
4969         bool hw_mode = true;
4970
4971         if (hw_mode) {
4972                 si_set_uvd_dcm(rdev, false);
4973         } else {
4974                 u32 tmp = RREG32(UVD_CGC_CTRL);
4975                 tmp &= ~DCM;
4976                 WREG32(UVD_CGC_CTRL, tmp);
4977         }
4978 }
4979
4980 static u32 si_halt_rlc(struct radeon_device *rdev)
4981 {
4982         u32 data, orig;
4983
4984         orig = data = RREG32(RLC_CNTL);
4985
4986         if (data & RLC_ENABLE) {
4987                 data &= ~RLC_ENABLE;
4988                 WREG32(RLC_CNTL, data);
4989
4990                 si_wait_for_rlc_serdes(rdev);
4991         }
4992
4993         return orig;
4994 }
4995
4996 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4997 {
4998         u32 tmp;
4999
5000         tmp = RREG32(RLC_CNTL);
5001         if (tmp != rlc)
5002                 WREG32(RLC_CNTL, rlc);
5003 }
5004
5005 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5006 {
5007         u32 data, orig;
5008
5009         orig = data = RREG32(DMA_PG);
5010         if (enable)
5011                 data |= PG_CNTL_ENABLE;
5012         else
5013                 data &= ~PG_CNTL_ENABLE;
5014         if (orig != data)
5015                 WREG32(DMA_PG, data);
5016 }
5017
5018 static void si_init_dma_pg(struct radeon_device *rdev)
5019 {
5020         u32 tmp;
5021
5022         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5023         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5024
5025         for (tmp = 0; tmp < 5; tmp++)
5026                 WREG32(DMA_PGFSM_WRITE, 0);
5027 }
5028
5029 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5030                                bool enable)
5031 {
5032         u32 tmp;
5033
5034         if (enable) {
5035                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5036                 WREG32(RLC_TTOP_D, tmp);
5037
5038                 tmp = RREG32(RLC_PG_CNTL);
5039                 tmp |= GFX_PG_ENABLE;
5040                 WREG32(RLC_PG_CNTL, tmp);
5041
5042                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5043                 tmp |= AUTO_PG_EN;
5044                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5045         } else {
5046                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5047                 tmp &= ~AUTO_PG_EN;
5048                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5049
5050                 tmp = RREG32(DB_RENDER_CONTROL);
5051         }
5052 }
5053
5054 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5055 {
5056         u32 tmp;
5057
5058         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5059
5060         tmp = RREG32(RLC_PG_CNTL);
5061         tmp |= GFX_PG_SRC;
5062         WREG32(RLC_PG_CNTL, tmp);
5063
5064         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5065
5066         tmp = RREG32(RLC_AUTO_PG_CTRL);
5067
5068         tmp &= ~GRBM_REG_SGIT_MASK;
5069         tmp |= GRBM_REG_SGIT(0x700);
5070         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5071         WREG32(RLC_AUTO_PG_CTRL, tmp);
5072 }
5073
5074 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5075 {
5076         u32 mask = 0, tmp, tmp1;
5077         int i;
5078
5079         si_select_se_sh(rdev, se, sh);
5080         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5081         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5082         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5083
5084         tmp &= 0xffff0000;
5085
5086         tmp |= tmp1;
5087         tmp >>= 16;
5088
5089         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5090                 mask <<= 1;
5091                 mask |= 1;
5092         }
5093
5094         return (~tmp) & mask;
5095 }
5096
5097 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5098 {
5099         u32 i, j, k, active_cu_number = 0;
5100         u32 mask, counter, cu_bitmap;
5101         u32 tmp = 0;
5102
5103         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5104                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5105                         mask = 1;
5106                         cu_bitmap = 0;
5107                         counter  = 0;
5108                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5109                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5110                                         if (counter < 2)
5111                                                 cu_bitmap |= mask;
5112                                         counter++;
5113                                 }
5114                                 mask <<= 1;
5115                         }
5116
5117                         active_cu_number += counter;
5118                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5119                 }
5120         }
5121
5122         WREG32(RLC_PG_AO_CU_MASK, tmp);
5123
5124         tmp = RREG32(RLC_MAX_PG_CU);
5125         tmp &= ~MAX_PU_CU_MASK;
5126         tmp |= MAX_PU_CU(active_cu_number);
5127         WREG32(RLC_MAX_PG_CU, tmp);
5128 }
5129
5130 static void si_enable_cgcg(struct radeon_device *rdev,
5131                            bool enable)
5132 {
5133         u32 data, orig, tmp;
5134
5135         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5136
5137         si_enable_gui_idle_interrupt(rdev, enable);
5138
5139         if (enable) {
5140                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5141
5142                 tmp = si_halt_rlc(rdev);
5143
5144                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5145                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5146                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5147
5148                 si_wait_for_rlc_serdes(rdev);
5149
5150                 si_update_rlc(rdev, tmp);
5151
5152                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5153
5154                 data |= CGCG_EN | CGLS_EN;
5155         } else {
5156                 RREG32(CB_CGTT_SCLK_CTRL);
5157                 RREG32(CB_CGTT_SCLK_CTRL);
5158                 RREG32(CB_CGTT_SCLK_CTRL);
5159                 RREG32(CB_CGTT_SCLK_CTRL);
5160
5161                 data &= ~(CGCG_EN | CGLS_EN);
5162         }
5163
5164         if (orig != data)
5165                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5166 }
5167
5168 static void si_enable_mgcg(struct radeon_device *rdev,
5169                            bool enable)
5170 {
5171         u32 data, orig, tmp = 0;
5172
5173         if (enable) {
5174                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5175                 data = 0x96940200;
5176                 if (orig != data)
5177                         WREG32(CGTS_SM_CTRL_REG, data);
5178
5179                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5180                 data |= CP_MEM_LS_EN;
5181                 if (orig != data)
5182                         WREG32(CP_MEM_SLP_CNTL, data);
5183
5184                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5185                 data &= 0xffffffc0;
5186                 if (orig != data)
5187                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5188
5189                 tmp = si_halt_rlc(rdev);
5190
5191                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5192                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5193                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5194
5195                 si_update_rlc(rdev, tmp);
5196         } else {
5197                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5198                 data |= 0x00000003;
5199                 if (orig != data)
5200                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5201
5202                 data = RREG32(CP_MEM_SLP_CNTL);
5203                 if (data & CP_MEM_LS_EN) {
5204                         data &= ~CP_MEM_LS_EN;
5205                         WREG32(CP_MEM_SLP_CNTL, data);
5206                 }
5207                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5208                 data |= LS_OVERRIDE | OVERRIDE;
5209                 if (orig != data)
5210                         WREG32(CGTS_SM_CTRL_REG, data);
5211
5212                 tmp = si_halt_rlc(rdev);
5213
5214                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5215                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5216                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5217
5218                 si_update_rlc(rdev, tmp);
5219         }
5220 }
5221
5222 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5223                                bool enable)
5224 {
5225         u32 orig, data, tmp;
5226
5227         if (enable) {
5228                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5229                 tmp |= 0x3fff;
5230                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5231
5232                 orig = data = RREG32(UVD_CGC_CTRL);
5233                 data |= DCM;
5234                 if (orig != data)
5235                         WREG32(UVD_CGC_CTRL, data);
5236
5237                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5238                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5239         } else {
5240                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5241                 tmp &= ~0x3fff;
5242                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5243
5244                 orig = data = RREG32(UVD_CGC_CTRL);
5245                 data &= ~DCM;
5246                 if (orig != data)
5247                         WREG32(UVD_CGC_CTRL, data);
5248
5249                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5250                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5251         }
5252 }
5253
5254 static const u32 mc_cg_registers[] =
5255 {
5256         MC_HUB_MISC_HUB_CG,
5257         MC_HUB_MISC_SIP_CG,
5258         MC_HUB_MISC_VM_CG,
5259         MC_XPB_CLK_GAT,
5260         ATC_MISC_CG,
5261         MC_CITF_MISC_WR_CG,
5262         MC_CITF_MISC_RD_CG,
5263         MC_CITF_MISC_VM_CG,
5264         VM_L2_CG,
5265 };
5266
5267 static void si_enable_mc_ls(struct radeon_device *rdev,
5268                             bool enable)
5269 {
5270         int i;
5271         u32 orig, data;
5272
5273         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5274                 orig = data = RREG32(mc_cg_registers[i]);
5275                 if (enable)
5276                         data |= MC_LS_ENABLE;
5277                 else
5278                         data &= ~MC_LS_ENABLE;
5279                 if (data != orig)
5280                         WREG32(mc_cg_registers[i], data);
5281         }
5282 }
5283
5284
5285 static void si_init_cg(struct radeon_device *rdev)
5286 {
5287         si_enable_mgcg(rdev, true);
5288         si_enable_cgcg(rdev, false);
5289         /* disable MC LS on Tahiti */
5290         if (rdev->family == CHIP_TAHITI)
5291                 si_enable_mc_ls(rdev, false);
5292         if (rdev->has_uvd) {
5293                 si_enable_uvd_mgcg(rdev, true);
5294                 si_init_uvd_internal_cg(rdev);
5295         }
5296 }
5297
5298 static void si_fini_cg(struct radeon_device *rdev)
5299 {
5300         if (rdev->has_uvd)
5301                 si_enable_uvd_mgcg(rdev, false);
5302         si_enable_cgcg(rdev, false);
5303         si_enable_mgcg(rdev, false);
5304 }
5305
5306 static void si_init_pg(struct radeon_device *rdev)
5307 {
5308         bool has_pg = false;
5309 #if 0
5310         /* only cape verde supports PG */
5311         if (rdev->family == CHIP_VERDE)
5312                 has_pg = true;
5313 #endif
5314         if (has_pg) {
5315                 si_init_ao_cu_mask(rdev);
5316                 si_init_dma_pg(rdev);
5317                 si_enable_dma_pg(rdev, true);
5318                 si_init_gfx_cgpg(rdev);
5319                 si_enable_gfx_cgpg(rdev, true);
5320         } else {
5321                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5322                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5323         }
5324 }
5325
5326 static void si_fini_pg(struct radeon_device *rdev)
5327 {
5328         bool has_pg = false;
5329
5330         /* only cape verde supports PG */
5331         if (rdev->family == CHIP_VERDE)
5332                 has_pg = true;
5333
5334         if (has_pg) {
5335                 si_enable_dma_pg(rdev, false);
5336                 si_enable_gfx_cgpg(rdev, false);
5337         }
5338 }
5339
5340 /*
5341  * RLC
5342  */
5343 void si_rlc_fini(struct radeon_device *rdev)
5344 {
5345         int r;
5346
5347         /* save restore block */
5348         if (rdev->rlc.save_restore_obj) {
5349                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5350                 if (unlikely(r != 0))
5351                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
5352                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
5353                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5354
5355                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
5356                 rdev->rlc.save_restore_obj = NULL;
5357         }
5358
5359         /* clear state block */
5360         if (rdev->rlc.clear_state_obj) {
5361                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5362                 if (unlikely(r != 0))
5363                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
5364                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
5365                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5366
5367                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
5368                 rdev->rlc.clear_state_obj = NULL;
5369         }
5370 }
5371
5372 #define RLC_CLEAR_STATE_END_MARKER          0x00000001
5373
5374 int si_rlc_init(struct radeon_device *rdev)
5375 {
5376         volatile u32 *dst_ptr;
5377         void *ptr;
5378         u32 dws, data, i, j, k, reg_num;
5379         u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
5380         u64 reg_list_mc_addr;
5381         const struct cs_section_def *cs_data = si_cs_data;
5382         int r;
5383
5384         /* save restore block */
5385         if (rdev->rlc.save_restore_obj == NULL) {
5386                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
5387                                      RADEON_GEM_DOMAIN_VRAM, NULL,
5388                                      &rdev->rlc.save_restore_obj);
5389                 if (r) {
5390                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
5391                         return r;
5392                 }
5393         }
5394
5395         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5396         if (unlikely(r != 0)) {
5397                 si_rlc_fini(rdev);
5398                 return r;
5399         }
5400         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
5401                           &rdev->rlc.save_restore_gpu_addr);
5402         if (r) {
5403                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5404                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
5405                 si_rlc_fini(rdev);
5406                 return r;
5407         }
5408
5409         if (rdev->family == CHIP_VERDE) {
5410                 r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&ptr);
5411                 if (r) {
5412                         dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
5413                         si_rlc_fini(rdev);
5414                 return r;
5415                 }
5416                 rdev->rlc.sr_ptr = ptr;
5417                 /* write the sr buffer */
5418                 dst_ptr = rdev->rlc.sr_ptr;
5419                 for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
5420                         dst_ptr[i] = verde_rlc_save_restore_register_list[i];
5421                 }
5422                 radeon_bo_kunmap(rdev->rlc.save_restore_obj);
5423         }
5424         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5425
5426         /* clear state block */
5427         reg_list_num = 0;
5428         dws = 0;
5429         for (i = 0; cs_data[i].section != NULL; i++) {
5430                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5431                         reg_list_num++;
5432                         dws += cs_data[i].section[j].reg_count;
5433                 }
5434         }
5435         reg_list_blk_index = (3 * reg_list_num + 2);
5436         dws += reg_list_blk_index;
5437
5438         if (rdev->rlc.clear_state_obj == NULL) {
5439                 r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
5440                                      RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
5441                 if (r) {
5442                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
5443                         si_rlc_fini(rdev);
5444                         return r;
5445                 }
5446         }
5447         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5448         if (unlikely(r != 0)) {
5449                 si_rlc_fini(rdev);
5450                 return r;
5451         }
5452         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
5453                           &rdev->rlc.clear_state_gpu_addr);
5454         if (r) {
5455
5456                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5457                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
5458                 si_rlc_fini(rdev);
5459                 return r;
5460         }
5461         r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&ptr);
5462         if (r) {
5463                 dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
5464                 si_rlc_fini(rdev);
5465                 return r;
5466         }
5467         rdev->rlc.cs_ptr = ptr;
5468         /* set up the cs buffer */
5469         dst_ptr = rdev->rlc.cs_ptr;
5470         reg_list_hdr_blk_index = 0;
5471         reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
5472         data = upper_32_bits(reg_list_mc_addr);
5473         dst_ptr[reg_list_hdr_blk_index] = data;
5474         reg_list_hdr_blk_index++;
5475         for (i = 0; cs_data[i].section != NULL; i++) {
5476                 for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5477                         reg_num = cs_data[i].section[j].reg_count;
5478                         data = reg_list_mc_addr & 0xffffffff;
5479                         dst_ptr[reg_list_hdr_blk_index] = data;
5480                         reg_list_hdr_blk_index++;
5481
5482                         data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
5483                         dst_ptr[reg_list_hdr_blk_index] = data;
5484                         reg_list_hdr_blk_index++;
5485
5486                         data = 0x08000000 | (reg_num * 4);
5487                         dst_ptr[reg_list_hdr_blk_index] = data;
5488                         reg_list_hdr_blk_index++;
5489
5490                         for (k = 0; k < reg_num; k++) {
5491                                 data = cs_data[i].section[j].extent[k];
5492                                 dst_ptr[reg_list_blk_index + k] = data;
5493                         }
5494                         reg_list_mc_addr += reg_num * 4;
5495                         reg_list_blk_index += reg_num;
5496                 }
5497         }
5498         dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
5499
5500         radeon_bo_kunmap(rdev->rlc.clear_state_obj);
5501         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5502
5503         return 0;
5504 }
5505
5506 static void si_rlc_reset(struct radeon_device *rdev)
5507 {
5508         u32 tmp = RREG32(GRBM_SOFT_RESET);
5509
5510         tmp |= SOFT_RESET_RLC;
5511         WREG32(GRBM_SOFT_RESET, tmp);
5512         DRM_UDELAY(50);
5513         tmp &= ~SOFT_RESET_RLC;
5514         WREG32(GRBM_SOFT_RESET, tmp);
5515         DRM_UDELAY(50);
5516 }
5517
5518 static void si_rlc_stop(struct radeon_device *rdev)
5519 {
5520         WREG32(RLC_CNTL, 0);
5521
5522         si_enable_gui_idle_interrupt(rdev, false);
5523
5524         si_wait_for_rlc_serdes(rdev);
5525 }
5526
5527 static void si_rlc_start(struct radeon_device *rdev)
5528 {
5529         WREG32(RLC_CNTL, RLC_ENABLE);
5530
5531         si_enable_gui_idle_interrupt(rdev, true);
5532
5533         DRM_UDELAY(50);
5534 }
5535
5536 static bool si_lbpw_supported(struct radeon_device *rdev)
5537 {
5538         u32 tmp;
5539
5540         /* Enable LBPW only for DDR3 */
5541         tmp = RREG32(MC_SEQ_MISC0);
5542         if ((tmp & 0xF0000000) == 0xB0000000)
5543                 return true;
5544         return false;
5545 }
5546
5547 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5548 {
5549         u32 tmp;
5550
5551         tmp = RREG32(RLC_LB_CNTL);
5552         if (enable)
5553                 tmp |= LOAD_BALANCE_ENABLE;
5554         else
5555                 tmp &= ~LOAD_BALANCE_ENABLE;
5556         WREG32(RLC_LB_CNTL, tmp);
5557
5558         if (!enable) {
5559                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5560                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5561         }
5562 }
5563
5564 static int si_rlc_resume(struct radeon_device *rdev)
5565 {
5566         u32 i;
5567         const __be32 *fw_data;
5568
5569         if (!rdev->rlc_fw)
5570                 return -EINVAL;
5571
5572         si_rlc_stop(rdev);
5573
5574         si_rlc_reset(rdev);
5575
5576         si_init_pg(rdev);
5577
5578         si_init_cg(rdev);
5579
5580         WREG32(RLC_RL_BASE, 0);
5581         WREG32(RLC_RL_SIZE, 0);
5582         WREG32(RLC_LB_CNTL, 0);
5583         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5584         WREG32(RLC_LB_CNTR_INIT, 0);
5585         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5586
5587         WREG32(RLC_MC_CNTL, 0);
5588         WREG32(RLC_UCODE_CNTL, 0);
5589
5590         fw_data = (const __be32 *)rdev->rlc_fw->data;
5591         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5592                 WREG32(RLC_UCODE_ADDR, i);
5593                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5594         }
5595         WREG32(RLC_UCODE_ADDR, 0);
5596
5597         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5598
5599         si_rlc_start(rdev);
5600
5601         return 0;
5602 }
5603
5604 static void si_enable_interrupts(struct radeon_device *rdev)
5605 {
5606         u32 ih_cntl = RREG32(IH_CNTL);
5607         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5608
5609         ih_cntl |= ENABLE_INTR;
5610         ih_rb_cntl |= IH_RB_ENABLE;
5611         WREG32(IH_CNTL, ih_cntl);
5612         WREG32(IH_RB_CNTL, ih_rb_cntl);
5613         rdev->ih.enabled = true;
5614 }
5615
5616 static void si_disable_interrupts(struct radeon_device *rdev)
5617 {
5618         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5619         u32 ih_cntl = RREG32(IH_CNTL);
5620
5621         ih_rb_cntl &= ~IH_RB_ENABLE;
5622         ih_cntl &= ~ENABLE_INTR;
5623         WREG32(IH_RB_CNTL, ih_rb_cntl);
5624         WREG32(IH_CNTL, ih_cntl);
5625         /* set rptr, wptr to 0 */
5626         WREG32(IH_RB_RPTR, 0);
5627         WREG32(IH_RB_WPTR, 0);
5628         rdev->ih.enabled = false;
5629         rdev->ih.rptr = 0;
5630 }
5631
5632 static void si_disable_interrupt_state(struct radeon_device *rdev)
5633 {
5634         u32 tmp;
5635
5636         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5637         WREG32(CP_INT_CNTL_RING1, 0);
5638         WREG32(CP_INT_CNTL_RING2, 0);
5639         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5640         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5641         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5642         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5643         WREG32(GRBM_INT_CNTL, 0);
5644         if (rdev->num_crtc >= 2) {
5645                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5646                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5647         }
5648         if (rdev->num_crtc >= 4) {
5649                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5650                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5651         }
5652         if (rdev->num_crtc >= 6) {
5653                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5654                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5655         }
5656
5657         if (rdev->num_crtc >= 2) {
5658                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5659                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5660         }
5661         if (rdev->num_crtc >= 4) {
5662                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5663                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5664         }
5665         if (rdev->num_crtc >= 6) {
5666                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5667                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5668         }
5669
5670         if (!ASIC_IS_NODCE(rdev)) {
5671                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5672
5673                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5674                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5675                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5676                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5677                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5678                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5679                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5680                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5681                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5682                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5683                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5684                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5685         }
5686 }
5687
5688 static int si_irq_init(struct radeon_device *rdev)
5689 {
5690         int ret = 0;
5691         int rb_bufsz;
5692         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5693
5694         /* allocate ring */
5695         ret = r600_ih_ring_alloc(rdev);
5696         if (ret)
5697                 return ret;
5698
5699         /* disable irqs */
5700         si_disable_interrupts(rdev);
5701
5702         /* init rlc */
5703         ret = si_rlc_resume(rdev);
5704         if (ret) {
5705                 r600_ih_ring_fini(rdev);
5706                 return ret;
5707         }
5708
5709         /* setup interrupt control */
5710         /* set dummy read address to ring address */
5711         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5712         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5713         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5714          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5715          */
5716         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5717         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5718         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5719         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5720
5721         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5722         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5723
5724         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5725                       IH_WPTR_OVERFLOW_CLEAR |
5726                       (rb_bufsz << 1));
5727
5728         if (rdev->wb.enabled)
5729                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5730
5731         /* set the writeback address whether it's enabled or not */
5732         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5733         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5734
5735         WREG32(IH_RB_CNTL, ih_rb_cntl);
5736
5737         /* set rptr, wptr to 0 */
5738         WREG32(IH_RB_RPTR, 0);
5739         WREG32(IH_RB_WPTR, 0);
5740
5741         /* Default settings for IH_CNTL (disabled at first) */
5742         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5743         /* RPTR_REARM only works if msi's are enabled */
5744         if (rdev->msi_enabled)
5745                 ih_cntl |= RPTR_REARM;
5746         WREG32(IH_CNTL, ih_cntl);
5747
5748         /* force the active interrupt state to all disabled */
5749         si_disable_interrupt_state(rdev);
5750
5751         pci_enable_busmaster(rdev->dev);
5752
5753         /* enable irqs */
5754         si_enable_interrupts(rdev);
5755
5756         return ret;
5757 }
5758
5759 int si_irq_set(struct radeon_device *rdev)
5760 {
5761         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5762         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5763         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5764         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5765         u32 grbm_int_cntl = 0;
5766         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5767         u32 dma_cntl, dma_cntl1;
5768         u32 thermal_int = 0;
5769
5770         if (!rdev->irq.installed) {
5771                 DRM_ERROR("Can't enable IRQ/MSI because no handler is installed\n");
5772                 return -EINVAL;
5773         }
5774         /* don't enable anything if the ih is disabled */
5775         if (!rdev->ih.enabled) {
5776                 si_disable_interrupts(rdev);
5777                 /* force the active interrupt state to all disabled */
5778                 si_disable_interrupt_state(rdev);
5779                 return 0;
5780         }
5781
5782         if (!ASIC_IS_NODCE(rdev)) {
5783                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5784                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5785                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5786                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5787                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5788                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5789         }
5790
5791         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5792         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5793
5794         thermal_int = RREG32(CG_THERMAL_INT) &
5795                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5796
5797         /* enable CP interrupts on all rings */
5798         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5799                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5800                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5801         }
5802         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5803                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5804                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5805         }
5806         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5807                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5808                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5809         }
5810         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5811                 DRM_DEBUG("si_irq_set: sw int dma\n");
5812                 dma_cntl |= TRAP_ENABLE;
5813         }
5814
5815         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5816                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5817                 dma_cntl1 |= TRAP_ENABLE;
5818         }
5819         if (rdev->irq.crtc_vblank_int[0] ||
5820             atomic_read(&rdev->irq.pflip[0])) {
5821                 DRM_DEBUG("si_irq_set: vblank 0\n");
5822                 crtc1 |= VBLANK_INT_MASK;
5823         }
5824         if (rdev->irq.crtc_vblank_int[1] ||
5825             atomic_read(&rdev->irq.pflip[1])) {
5826                 DRM_DEBUG("si_irq_set: vblank 1\n");
5827                 crtc2 |= VBLANK_INT_MASK;
5828         }
5829         if (rdev->irq.crtc_vblank_int[2] ||
5830             atomic_read(&rdev->irq.pflip[2])) {
5831                 DRM_DEBUG("si_irq_set: vblank 2\n");
5832                 crtc3 |= VBLANK_INT_MASK;
5833         }
5834         if (rdev->irq.crtc_vblank_int[3] ||
5835             atomic_read(&rdev->irq.pflip[3])) {
5836                 DRM_DEBUG("si_irq_set: vblank 3\n");
5837                 crtc4 |= VBLANK_INT_MASK;
5838         }
5839         if (rdev->irq.crtc_vblank_int[4] ||
5840             atomic_read(&rdev->irq.pflip[4])) {
5841                 DRM_DEBUG("si_irq_set: vblank 4\n");
5842                 crtc5 |= VBLANK_INT_MASK;
5843         }
5844         if (rdev->irq.crtc_vblank_int[5] ||
5845             atomic_read(&rdev->irq.pflip[5])) {
5846                 DRM_DEBUG("si_irq_set: vblank 5\n");
5847                 crtc6 |= VBLANK_INT_MASK;
5848         }
5849         if (rdev->irq.hpd[0]) {
5850                 DRM_DEBUG("si_irq_set: hpd 1\n");
5851                 hpd1 |= DC_HPDx_INT_EN;
5852         }
5853         if (rdev->irq.hpd[1]) {
5854                 DRM_DEBUG("si_irq_set: hpd 2\n");
5855                 hpd2 |= DC_HPDx_INT_EN;
5856         }
5857         if (rdev->irq.hpd[2]) {
5858                 DRM_DEBUG("si_irq_set: hpd 3\n");
5859                 hpd3 |= DC_HPDx_INT_EN;
5860         }
5861         if (rdev->irq.hpd[3]) {
5862                 DRM_DEBUG("si_irq_set: hpd 4\n");
5863                 hpd4 |= DC_HPDx_INT_EN;
5864         }
5865         if (rdev->irq.hpd[4]) {
5866                 DRM_DEBUG("si_irq_set: hpd 5\n");
5867                 hpd5 |= DC_HPDx_INT_EN;
5868         }
5869         if (rdev->irq.hpd[5]) {
5870                 DRM_DEBUG("si_irq_set: hpd 6\n");
5871                 hpd6 |= DC_HPDx_INT_EN;
5872         }
5873
5874         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5875         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5876         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5877
5878         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5879         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5880
5881         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5882
5883         if (rdev->irq.dpm_thermal) {
5884                 DRM_DEBUG("dpm thermal\n");
5885                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5886         }
5887
5888         if (rdev->num_crtc >= 2) {
5889                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5890                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5891         }
5892         if (rdev->num_crtc >= 4) {
5893                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5894                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5895         }
5896         if (rdev->num_crtc >= 6) {
5897                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5898                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5899         }
5900
5901         if (rdev->num_crtc >= 2) {
5902                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5903                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5904         }
5905         if (rdev->num_crtc >= 4) {
5906                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5907                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5908         }
5909         if (rdev->num_crtc >= 6) {
5910                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5911                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5912         }
5913
5914         if (!ASIC_IS_NODCE(rdev)) {
5915                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5916                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5917                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5918                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5919                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5920                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5921         }
5922
5923         WREG32(CG_THERMAL_INT, thermal_int);
5924
5925         return 0;
5926 }
5927
5928 static inline void si_irq_ack(struct radeon_device *rdev)
5929 {
5930         u32 tmp;
5931
5932         if (ASIC_IS_NODCE(rdev))
5933                 return;
5934
5935         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5936         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5937         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5938         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5939         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5940         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5941         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5942         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5943         if (rdev->num_crtc >= 4) {
5944                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5945                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5946         }
5947         if (rdev->num_crtc >= 6) {
5948                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5949                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5950         }
5951
5952         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5953                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5954         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5955                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5956         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5957                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5958         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5959                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5960         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5961                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5962         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5963                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5964
5965         if (rdev->num_crtc >= 4) {
5966                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5967                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5968                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5969                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5970                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5971                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5972                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5973                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5974                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5975                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5976                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5977                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5978         }
5979
5980         if (rdev->num_crtc >= 6) {
5981                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5982                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5983                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5984                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5985                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5986                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5987                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5988                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5989                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5990                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5991                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5992                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5993         }
5994
5995         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5996                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5997                 tmp |= DC_HPDx_INT_ACK;
5998                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5999         }
6000         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6001                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6002                 tmp |= DC_HPDx_INT_ACK;
6003                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6004         }
6005         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6006                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6007                 tmp |= DC_HPDx_INT_ACK;
6008                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6009         }
6010         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6011                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6012                 tmp |= DC_HPDx_INT_ACK;
6013                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6014         }
6015         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6016                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6017                 tmp |= DC_HPDx_INT_ACK;
6018                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6019         }
6020         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6021                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6022                 tmp |= DC_HPDx_INT_ACK;
6023                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6024         }
6025 }
6026
6027 static void si_irq_disable(struct radeon_device *rdev)
6028 {
6029         si_disable_interrupts(rdev);
6030         /* Wait and acknowledge irq */
6031         DRM_MDELAY(1);
6032         si_irq_ack(rdev);
6033         si_disable_interrupt_state(rdev);
6034 }
6035
6036 static void si_irq_suspend(struct radeon_device *rdev)
6037 {
6038         si_irq_disable(rdev);
6039         si_rlc_stop(rdev);
6040 }
6041
6042 static void si_irq_fini(struct radeon_device *rdev)
6043 {
6044         si_irq_suspend(rdev);
6045         r600_ih_ring_fini(rdev);
6046 }
6047
6048 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6049 {
6050         u32 wptr, tmp;
6051
6052         if (rdev->wb.enabled)
6053                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6054         else
6055                 wptr = RREG32(IH_RB_WPTR);
6056
6057         if (wptr & RB_OVERFLOW) {
6058                 /* When a ring buffer overflow happen start parsing interrupt
6059                  * from the last not overwritten vector (wptr + 16). Hopefully
6060                  * this should allow us to catchup.
6061                  */
6062                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6063                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6064                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6065                 tmp = RREG32(IH_RB_CNTL);
6066                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6067                 WREG32(IH_RB_CNTL, tmp);
6068         }
6069         return (wptr & rdev->ih.ptr_mask);
6070 }
6071
6072 /*        SI IV Ring
6073  * Each IV ring entry is 128 bits:
6074  * [7:0]    - interrupt source id
6075  * [31:8]   - reserved
6076  * [59:32]  - interrupt source data
6077  * [63:60]  - reserved
6078  * [71:64]  - RINGID
6079  * [79:72]  - VMID
6080  * [127:80] - reserved
6081  */
6082 irqreturn_t si_irq_process(struct radeon_device *rdev)
6083 {
6084         u32 wptr;
6085         u32 rptr;
6086         u32 src_id, src_data, ring_id;
6087         u32 ring_index;
6088         bool queue_hotplug = false;
6089         bool queue_thermal = false;
6090         u32 status, addr;
6091
6092         if (!rdev->ih.enabled || rdev->shutdown)
6093                 return IRQ_NONE;
6094
6095         wptr = si_get_ih_wptr(rdev);
6096
6097 restart_ih:
6098         /* is somebody else already processing irqs? */
6099         if (atomic_xchg(&rdev->ih.lock, 1))
6100                 return IRQ_NONE;
6101
6102         rptr = rdev->ih.rptr;
6103         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6104
6105         /* Order reading of wptr vs. reading of IH ring data */
6106         cpu_lfence();
6107
6108         /* display interrupts */
6109         si_irq_ack(rdev);
6110
6111         while (rptr != wptr) {
6112                 /* wptr/rptr are in bytes! */
6113                 ring_index = rptr / 4;
6114                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6115                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6116                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6117
6118                 switch (src_id) {
6119                 case 1: /* D1 vblank/vline */
6120                         switch (src_data) {
6121                         case 0: /* D1 vblank */
6122                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6123                                         if (rdev->irq.crtc_vblank_int[0]) {
6124                                                 drm_handle_vblank(rdev->ddev, 0);
6125                                                 rdev->pm.vblank_sync = true;
6126                                                 DRM_WAKEUP(&rdev->irq.vblank_queue);
6127                                         }
6128                                         if (atomic_read(&rdev->irq.pflip[0]))
6129                                                 radeon_crtc_handle_flip(rdev, 0);
6130                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6131                                         DRM_DEBUG("IH: D1 vblank\n");
6132                                 }
6133                                 break;
6134                         case 1: /* D1 vline */
6135                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6136                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6137                                         DRM_DEBUG("IH: D1 vline\n");
6138                                 }
6139                                 break;
6140                         default:
6141                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6142                                 break;
6143                         }
6144                         break;
6145                 case 2: /* D2 vblank/vline */
6146                         switch (src_data) {
6147                         case 0: /* D2 vblank */
6148                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6149                                         if (rdev->irq.crtc_vblank_int[1]) {
6150                                                 drm_handle_vblank(rdev->ddev, 1);
6151                                                 rdev->pm.vblank_sync = true;
6152                                                 DRM_WAKEUP(&rdev->irq.vblank_queue);
6153                                         }
6154                                         if (atomic_read(&rdev->irq.pflip[1]))
6155                                                 radeon_crtc_handle_flip(rdev, 1);
6156                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6157                                         DRM_DEBUG("IH: D2 vblank\n");
6158                                 }
6159                                 break;
6160                         case 1: /* D2 vline */
6161                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6162                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6163                                         DRM_DEBUG("IH: D2 vline\n");
6164                                 }
6165                                 break;
6166                         default:
6167                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6168                                 break;
6169                         }
6170                         break;
6171                 case 3: /* D3 vblank/vline */
6172                         switch (src_data) {
6173                         case 0: /* D3 vblank */
6174                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6175                                         if (rdev->irq.crtc_vblank_int[2]) {
6176                                                 drm_handle_vblank(rdev->ddev, 2);
6177                                                 rdev->pm.vblank_sync = true;
6178                                                 DRM_WAKEUP(&rdev->irq.vblank_queue);
6179                                         }
6180                                         if (atomic_read(&rdev->irq.pflip[2]))
6181                                                 radeon_crtc_handle_flip(rdev, 2);
6182                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6183                                         DRM_DEBUG("IH: D3 vblank\n");
6184                                 }
6185                                 break;
6186                         case 1: /* D3 vline */
6187                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6188                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6189                                         DRM_DEBUG("IH: D3 vline\n");
6190                                 }
6191                                 break;
6192                         default:
6193                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6194                                 break;
6195                         }
6196                         break;
6197                 case 4: /* D4 vblank/vline */
6198                         switch (src_data) {
6199                         case 0: /* D4 vblank */
6200                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6201                                         if (rdev->irq.crtc_vblank_int[3]) {
6202                                                 drm_handle_vblank(rdev->ddev, 3);
6203                                                 rdev->pm.vblank_sync = true;
6204                                                 DRM_WAKEUP(&rdev->irq.vblank_queue);
6205                                         }
6206                                         if (atomic_read(&rdev->irq.pflip[3]))
6207                                                 radeon_crtc_handle_flip(rdev, 3);
6208                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6209                                         DRM_DEBUG("IH: D4 vblank\n");
6210                                 }
6211                                 break;
6212                         case 1: /* D4 vline */
6213                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6214                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6215                                         DRM_DEBUG("IH: D4 vline\n");
6216                                 }
6217                                 break;
6218                         default:
6219                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6220                                 break;
6221                         }
6222                         break;
6223                 case 5: /* D5 vblank/vline */
6224                         switch (src_data) {
6225                         case 0: /* D5 vblank */
6226                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6227                                         if (rdev->irq.crtc_vblank_int[4]) {
6228                                                 drm_handle_vblank(rdev->ddev, 4);
6229                                                 rdev->pm.vblank_sync = true;
6230                                                 DRM_WAKEUP(&rdev->irq.vblank_queue);
6231                                         }
6232                                         if (atomic_read(&rdev->irq.pflip[4]))
6233                                                 radeon_crtc_handle_flip(rdev, 4);
6234                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6235                                         DRM_DEBUG("IH: D5 vblank\n");
6236                                 }
6237                                 break;
6238                         case 1: /* D5 vline */
6239                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6240                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6241                                         DRM_DEBUG("IH: D5 vline\n");
6242                                 }
6243                                 break;
6244                         default:
6245                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6246                                 break;
6247                         }
6248                         break;
6249                 case 6: /* D6 vblank/vline */
6250                         switch (src_data) {
6251                         case 0: /* D6 vblank */
6252                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6253                                         if (rdev->irq.crtc_vblank_int[5]) {
6254                                                 drm_handle_vblank(rdev->ddev, 5);
6255                                                 rdev->pm.vblank_sync = true;
6256                                                 DRM_WAKEUP(&rdev->irq.vblank_queue);
6257                                         }
6258                                         if (atomic_read(&rdev->irq.pflip[5]))
6259                                                 radeon_crtc_handle_flip(rdev, 5);
6260                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6261                                         DRM_DEBUG("IH: D6 vblank\n");
6262                                 }
6263                                 break;
6264                         case 1: /* D6 vline */
6265                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6266                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6267                                         DRM_DEBUG("IH: D6 vline\n");
6268                                 }
6269                                 break;
6270                         default:
6271                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6272                                 break;
6273                         }
6274                         break;
6275                 case 42: /* HPD hotplug */
6276                         switch (src_data) {
6277                         case 0:
6278                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6279                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6280                                         queue_hotplug = true;
6281                                         DRM_DEBUG("IH: HPD1\n");
6282                                 }
6283                                 break;
6284                         case 1:
6285                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6286                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6287                                         queue_hotplug = true;
6288                                         DRM_DEBUG("IH: HPD2\n");
6289                                 }
6290                                 break;
6291                         case 2:
6292                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6293                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6294                                         queue_hotplug = true;
6295                                         DRM_DEBUG("IH: HPD3\n");
6296                                 }
6297                                 break;
6298                         case 3:
6299                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6300                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6301                                         queue_hotplug = true;
6302                                         DRM_DEBUG("IH: HPD4\n");
6303                                 }
6304                                 break;
6305                         case 4:
6306                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6307                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6308                                         queue_hotplug = true;
6309                                         DRM_DEBUG("IH: HPD5\n");
6310                                 }
6311                                 break;
6312                         case 5:
6313                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6314                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6315                                         queue_hotplug = true;
6316                                         DRM_DEBUG("IH: HPD6\n");
6317                                 }
6318                                 break;
6319                         default:
6320                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6321                                 break;
6322                         }
6323                         break;
6324                 case 146:
6325                 case 147:
6326                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6327                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6328                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6329                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6330                                 addr);
6331                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6332                                 status);
6333                         si_vm_decode_fault(rdev, status, addr);
6334                         /* reset addr and status */
6335                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6336                         break;
6337                 case 176: /* RINGID0 CP_INT */
6338                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6339                         break;
6340                 case 177: /* RINGID1 CP_INT */
6341                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6342                         break;
6343                 case 178: /* RINGID2 CP_INT */
6344                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6345                         break;
6346                 case 181: /* CP EOP event */
6347                         DRM_DEBUG("IH: CP EOP\n");
6348                         switch (ring_id) {
6349                         case 0:
6350                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6351                                 break;
6352                         case 1:
6353                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6354                                 break;
6355                         case 2:
6356                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6357                                 break;
6358                         }
6359                         break;
6360                 case 224: /* DMA trap event */
6361                         DRM_DEBUG("IH: DMA trap\n");
6362                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6363                         break;
6364                 case 230: /* thermal low to high */
6365                         DRM_DEBUG("IH: thermal low to high\n");
6366                         rdev->pm.dpm.thermal.high_to_low = false;
6367                         queue_thermal = true;
6368                         break;
6369                 case 231: /* thermal high to low */
6370                         DRM_DEBUG("IH: thermal high to low\n");
6371                         rdev->pm.dpm.thermal.high_to_low = true;
6372                         queue_thermal = true;
6373                         break;
6374                 case 233: /* GUI IDLE */
6375                         DRM_DEBUG("IH: GUI idle\n");
6376                         break;
6377                 case 244: /* DMA trap event */
6378                         DRM_DEBUG("IH: DMA1 trap\n");
6379                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6380                         break;
6381                 default:
6382                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6383                         break;
6384                 }
6385
6386                 /* wptr/rptr are in bytes! */
6387                 rptr += 16;
6388                 rptr &= rdev->ih.ptr_mask;
6389         }
6390         if (queue_hotplug)
6391                 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
6392         if (queue_thermal && rdev->pm.dpm_enabled)
6393                 taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
6394
6395         rdev->ih.rptr = rptr;
6396         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6397         atomic_set(&rdev->ih.lock, 0);
6398
6399         /* make sure wptr hasn't changed while processing */
6400         wptr = si_get_ih_wptr(rdev);
6401         if (wptr != rptr)
6402                 goto restart_ih;
6403
6404         return IRQ_HANDLED;
6405 }
6406
6407 /**
6408  * si_copy_dma - copy pages using the DMA engine
6409  *
6410  * @rdev: radeon_device pointer
6411  * @src_offset: src GPU address
6412  * @dst_offset: dst GPU address
6413  * @num_gpu_pages: number of GPU pages to xfer
6414  * @fence: radeon fence object
6415  *
6416  * Copy GPU paging using the DMA engine (SI).
6417  * Used by the radeon ttm implementation to move pages if
6418  * registered as the asic copy callback.
6419  */
6420 int si_copy_dma(struct radeon_device *rdev,
6421                 uint64_t src_offset, uint64_t dst_offset,
6422                 unsigned num_gpu_pages,
6423                 struct radeon_fence **fence)
6424 {
6425         struct radeon_semaphore *sem = NULL;
6426         int ring_index = rdev->asic->copy.dma_ring_index;
6427         struct radeon_ring *ring = &rdev->ring[ring_index];
6428         u32 size_in_bytes, cur_size_in_bytes;
6429         int i, num_loops;
6430         int r = 0;
6431
6432         r = radeon_semaphore_create(rdev, &sem);
6433         if (r) {
6434                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6435                 return r;
6436         }
6437
6438         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6439         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6440         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6441         if (r) {
6442                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6443                 radeon_semaphore_free(rdev, &sem, NULL);
6444                 return r;
6445         }
6446
6447         if (radeon_fence_need_sync(*fence, ring->idx)) {
6448                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6449                                             ring->idx);
6450                 radeon_fence_note_sync(*fence, ring->idx);
6451         } else {
6452                 radeon_semaphore_free(rdev, &sem, NULL);
6453         }
6454
6455         for (i = 0; i < num_loops; i++) {
6456                 cur_size_in_bytes = size_in_bytes;
6457                 if (cur_size_in_bytes > 0xFFFFF)
6458                         cur_size_in_bytes = 0xFFFFF;
6459                 size_in_bytes -= cur_size_in_bytes;
6460                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6461                 radeon_ring_write(ring, dst_offset & 0xffffffff);
6462                 radeon_ring_write(ring, src_offset & 0xffffffff);
6463                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6464                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6465                 src_offset += cur_size_in_bytes;
6466                 dst_offset += cur_size_in_bytes;
6467         }
6468
6469         r = radeon_fence_emit(rdev, fence, ring->idx);
6470         if (r) {
6471                 radeon_ring_unlock_undo(rdev, ring);
6472                 return r;
6473         }
6474
6475         radeon_ring_unlock_commit(rdev, ring);
6476         radeon_semaphore_free(rdev, &sem, *fence);
6477
6478         return r;
6479 }
6480
6481 /*
6482  * startup/shutdown callbacks
6483  */
6484 static int si_startup(struct radeon_device *rdev)
6485 {
6486         struct radeon_ring *ring;
6487         int r;
6488
6489         /* enable pcie gen2/3 link */
6490         si_pcie_gen3_enable(rdev);
6491         /* enable aspm */
6492         si_program_aspm(rdev);
6493
6494         si_mc_program(rdev);
6495
6496         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6497             !rdev->rlc_fw || !rdev->mc_fw) {
6498                 r = si_init_microcode(rdev);
6499                 if (r) {
6500                         DRM_ERROR("Failed to load firmware!\n");
6501                         return r;
6502                 }
6503         }
6504
6505         r = si_mc_load_microcode(rdev);
6506         if (r) {
6507                 DRM_ERROR("Failed to load MC firmware!\n");
6508                 return r;
6509         }
6510
6511         r = r600_vram_scratch_init(rdev);
6512         if (r)
6513                 return r;
6514
6515         r = si_pcie_gart_enable(rdev);
6516         if (r)
6517                 return r;
6518         si_gpu_init(rdev);
6519
6520         /* allocate rlc buffers */
6521         r = si_rlc_init(rdev);
6522         if (r) {
6523                 DRM_ERROR("Failed to init rlc BOs!\n");
6524                 return r;
6525         }
6526
6527         /* allocate wb buffer */
6528         r = radeon_wb_init(rdev);
6529         if (r)
6530                 return r;
6531
6532         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6533         if (r) {
6534                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6535                 return r;
6536         }
6537
6538         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6539         if (r) {
6540                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6541                 return r;
6542         }
6543
6544         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6545         if (r) {
6546                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6547                 return r;
6548         }
6549
6550         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6551         if (r) {
6552                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6553                 return r;
6554         }
6555
6556         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6557         if (r) {
6558                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6559                 return r;
6560         }
6561
6562         if (rdev->has_uvd) {
6563                 r = rv770_uvd_resume(rdev);
6564                 if (!r) {
6565                         r = radeon_fence_driver_start_ring(rdev,
6566                                                            R600_RING_TYPE_UVD_INDEX);
6567                         if (r)
6568                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6569                 }
6570                 if (r)
6571                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6572         }
6573
6574         /* Enable IRQ */
6575         if (!rdev->irq.installed) {
6576                 r = radeon_irq_kms_init(rdev);
6577                 if (r)
6578                         return r;
6579         }
6580
6581         r = si_irq_init(rdev);
6582         if (r) {
6583                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6584                 radeon_irq_kms_fini(rdev);
6585                 return r;
6586         }
6587         si_irq_set(rdev);
6588
6589         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6590         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6591                              CP_RB0_RPTR, CP_RB0_WPTR,
6592                              0, 0xfffff, RADEON_CP_PACKET2);
6593         if (r)
6594                 return r;
6595
6596         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6597         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6598                              CP_RB1_RPTR, CP_RB1_WPTR,
6599                              0, 0xfffff, RADEON_CP_PACKET2);
6600         if (r)
6601                 return r;
6602
6603         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6604         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6605                              CP_RB2_RPTR, CP_RB2_WPTR,
6606                              0, 0xfffff, RADEON_CP_PACKET2);
6607         if (r)
6608                 return r;
6609
6610         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6611         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6612                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6613                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6614                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6615         if (r)
6616                 return r;
6617
6618         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6619         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6620                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6621                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6622                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6623         if (r)
6624                 return r;
6625
6626         r = si_cp_load_microcode(rdev);
6627         if (r)
6628                 return r;
6629         r = si_cp_resume(rdev);
6630         if (r)
6631                 return r;
6632
6633         r = cayman_dma_resume(rdev);
6634         if (r)
6635                 return r;
6636
6637         if (rdev->has_uvd) {
6638                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6639                 if (ring->ring_size) {
6640                         r = radeon_ring_init(rdev, ring, ring->ring_size,
6641                                              R600_WB_UVD_RPTR_OFFSET,
6642                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6643                                              0, 0xfffff, RADEON_CP_PACKET2);
6644                         if (!r)
6645                                 r = r600_uvd_init(rdev);
6646                         if (r)
6647                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6648                 }
6649         }
6650
6651         r = radeon_ib_pool_init(rdev);
6652         if (r) {
6653                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6654                 return r;
6655         }
6656
6657         r = radeon_vm_manager_init(rdev);
6658         if (r) {
6659                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6660                 return r;
6661         }
6662
6663         return 0;
6664 }
6665
6666 int si_resume(struct radeon_device *rdev)
6667 {
6668         int r;
6669
6670         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6671          * posting will perform necessary task to bring back GPU into good
6672          * shape.
6673          */
6674         /* post card */
6675         atom_asic_init(rdev->mode_info.atom_context);
6676
6677         /* init golden registers */
6678         si_init_golden_registers(rdev);
6679
6680         rdev->accel_working = true;
6681         r = si_startup(rdev);
6682         if (r) {
6683                 DRM_ERROR("si startup failed on resume\n");
6684                 rdev->accel_working = false;
6685                 return r;
6686         }
6687
6688         return r;
6689
6690 }
6691
6692 int si_suspend(struct radeon_device *rdev)
6693 {
6694         radeon_vm_manager_fini(rdev);
6695         si_cp_enable(rdev, false);
6696         cayman_dma_stop(rdev);
6697         if (rdev->has_uvd) {
6698                 r600_uvd_stop(rdev);
6699                 radeon_uvd_suspend(rdev);
6700         }
6701         si_irq_suspend(rdev);
6702         radeon_wb_disable(rdev);
6703         si_pcie_gart_disable(rdev);
6704         return 0;
6705 }
6706
6707 /* Plan is to move initialization in that function and use
6708  * helper function so that radeon_device_init pretty much
6709  * do nothing more than calling asic specific function. This
6710  * should also allow to remove a bunch of callback function
6711  * like vram_info.
6712  */
6713 int si_init(struct radeon_device *rdev)
6714 {
6715         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6716         int r;
6717
6718         /* Read BIOS */
6719         if (!radeon_get_bios(rdev)) {
6720                 if (ASIC_IS_AVIVO(rdev))
6721                         return -EINVAL;
6722         }
6723         /* Must be an ATOMBIOS */
6724         if (!rdev->is_atom_bios) {
6725                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6726                 return -EINVAL;
6727         }
6728         r = radeon_atombios_init(rdev);
6729         if (r)
6730                 return r;
6731
6732         /* Post card if necessary */
6733         if (!radeon_card_posted(rdev)) {
6734                 if (!rdev->bios) {
6735                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6736                         return -EINVAL;
6737                 }
6738                 DRM_INFO("GPU not posted. posting now...\n");
6739                 atom_asic_init(rdev->mode_info.atom_context);
6740         }
6741         /* init golden registers */
6742         si_init_golden_registers(rdev);
6743         /* Initialize scratch registers */
6744         si_scratch_init(rdev);
6745         /* Initialize surface registers */
6746         radeon_surface_init(rdev);
6747         /* Initialize clocks */
6748         radeon_get_clock_info(rdev->ddev);
6749
6750         /* Fence driver */
6751         r = radeon_fence_driver_init(rdev);
6752         if (r)
6753                 return r;
6754
6755         /* initialize memory controller */
6756         r = si_mc_init(rdev);
6757         if (r)
6758                 return r;
6759         /* Memory manager */
6760         r = radeon_bo_init(rdev);
6761         if (r)
6762                 return r;
6763
6764         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6765         ring->ring_obj = NULL;
6766         r600_ring_init(rdev, ring, 1024 * 1024);
6767
6768         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6769         ring->ring_obj = NULL;
6770         r600_ring_init(rdev, ring, 1024 * 1024);
6771
6772         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6773         ring->ring_obj = NULL;
6774         r600_ring_init(rdev, ring, 1024 * 1024);
6775
6776         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6777         ring->ring_obj = NULL;
6778         r600_ring_init(rdev, ring, 64 * 1024);
6779
6780         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6781         ring->ring_obj = NULL;
6782         r600_ring_init(rdev, ring, 64 * 1024);
6783
6784         if (rdev->has_uvd) {
6785                 r = radeon_uvd_init(rdev);
6786                 if (!r) {
6787                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6788                         ring->ring_obj = NULL;
6789                         r600_ring_init(rdev, ring, 4096);
6790                 }
6791         }
6792
6793         rdev->ih.ring_obj = NULL;
6794         r600_ih_ring_init(rdev, 64 * 1024);
6795
6796         r = r600_pcie_gart_init(rdev);
6797         if (r)
6798                 return r;
6799
6800         rdev->accel_working = true;
6801         r = si_startup(rdev);
6802         if (r) {
6803                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6804                 si_cp_fini(rdev);
6805                 cayman_dma_fini(rdev);
6806                 si_irq_fini(rdev);
6807                 si_rlc_fini(rdev);
6808                 radeon_wb_fini(rdev);
6809                 radeon_ib_pool_fini(rdev);
6810                 radeon_vm_manager_fini(rdev);
6811                 radeon_irq_kms_fini(rdev);
6812                 si_pcie_gart_fini(rdev);
6813                 rdev->accel_working = false;
6814         }
6815
6816         /* Don't start up if the MC ucode is missing.
6817          * The default clocks and voltages before the MC ucode
6818          * is loaded are not suffient for advanced operations.
6819          */
6820         if (!rdev->mc_fw) {
6821                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6822                 return -EINVAL;
6823         }
6824
6825         return 0;
6826 }
6827
6828 void si_fini(struct radeon_device *rdev)
6829 {
6830         si_cp_fini(rdev);
6831         cayman_dma_fini(rdev);
6832         si_irq_fini(rdev);
6833         si_rlc_fini(rdev);
6834         si_fini_cg(rdev);
6835         si_fini_pg(rdev);
6836         radeon_wb_fini(rdev);
6837         radeon_vm_manager_fini(rdev);
6838         radeon_ib_pool_fini(rdev);
6839         radeon_irq_kms_fini(rdev);
6840         if (rdev->has_uvd) {
6841                 r600_uvd_stop(rdev);
6842                 radeon_uvd_fini(rdev);
6843         }
6844         si_pcie_gart_fini(rdev);
6845         r600_vram_scratch_fini(rdev);
6846         radeon_gem_fini(rdev);
6847         radeon_fence_driver_fini(rdev);
6848         radeon_bo_fini(rdev);
6849         radeon_atombios_fini(rdev);
6850         si_fini_microcode(rdev);
6851         drm_free(rdev->bios, M_DRM);
6852         rdev->bios = NULL;
6853 }
6854
6855 /**
6856  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6857  *
6858  * @rdev: radeon_device pointer
6859  *
6860  * Fetches a GPU clock counter snapshot (SI).
6861  * Returns the 64 bit clock counter snapshot.
6862  */
6863 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6864 {
6865         uint64_t clock;
6866
6867         spin_lock(&rdev->gpu_clock_mutex);
6868         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6869         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6870                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6871         spin_unlock(&rdev->gpu_clock_mutex);
6872         return clock;
6873 }
6874
6875 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6876 {
6877         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6878         int r;
6879
6880         /* bypass vclk and dclk with bclk */
6881         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6882                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6883                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6884
6885         /* put PLL in bypass mode */
6886         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6887
6888         if (!vclk || !dclk) {
6889                 /* keep the Bypass mode, put PLL to sleep */
6890                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6891                 return 0;
6892         }
6893
6894         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6895                                           16384, 0x03FFFFFF, 0, 128, 5,
6896                                           &fb_div, &vclk_div, &dclk_div);
6897         if (r)
6898                 return r;
6899
6900         /* set RESET_ANTI_MUX to 0 */
6901         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6902
6903         /* set VCO_MODE to 1 */
6904         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6905
6906         /* toggle UPLL_SLEEP to 1 then back to 0 */
6907         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6908         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6909
6910         /* deassert UPLL_RESET */
6911         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6912
6913         DRM_MDELAY(1);
6914
6915         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6916         if (r)
6917                 return r;
6918
6919         /* assert UPLL_RESET again */
6920         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6921
6922         /* disable spread spectrum. */
6923         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6924
6925         /* set feedback divider */
6926         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6927
6928         /* set ref divider to 0 */
6929         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6930
6931         if (fb_div < 307200)
6932                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6933         else
6934                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6935
6936         /* set PDIV_A and PDIV_B */
6937         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6938                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6939                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6940
6941         /* give the PLL some time to settle */
6942         DRM_MDELAY(15);
6943
6944         /* deassert PLL_RESET */
6945         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6946
6947         DRM_MDELAY(15);
6948
6949         /* switch from bypass mode to normal mode */
6950         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6951
6952         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6953         if (r)
6954                 return r;
6955
6956         /* switch VCLK and DCLK selection */
6957         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6958                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6959                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6960
6961         DRM_MDELAY(100);
6962
6963         return 0;
6964 }
6965
6966 static struct pci_dev dev_to_pcidev(device_t dev)
6967 {
6968     struct pci_dev pdev;
6969     pdev.dev = dev;
6970     return pdev;
6971 }
6972
6973 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6974 {
6975 #if 0
6976         struct pci_dev *root = rdev->dev->bus->self;
6977 #else
6978         device_t root = device_get_parent(rdev->dev);
6979 #endif
6980         int bridge_pos, gpu_pos;
6981         u32 speed_cntl, mask, current_data_rate;
6982         int ret, i;
6983         u16 tmp16;
6984         struct pci_dev root_pdev = dev_to_pcidev(root);
6985         struct pci_dev pdev = dev_to_pcidev(rdev->dev);
6986
6987         if (radeon_pcie_gen2 == 0)
6988                 return;
6989
6990         if (rdev->flags & RADEON_IS_IGP)
6991                 return;
6992
6993         if (!(rdev->flags & RADEON_IS_PCIE))
6994                 return;
6995
6996         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6997         if (ret != 0)
6998                 return;
6999
7000         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7001                 return;
7002
7003         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7004         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7005                 LC_CURRENT_DATA_RATE_SHIFT;
7006         if (mask & DRM_PCIE_SPEED_80) {
7007                 if (current_data_rate == 2) {
7008                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7009                         return;
7010                 }
7011                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7012         } else if (mask & DRM_PCIE_SPEED_50) {
7013                 if (current_data_rate == 1) {
7014                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7015                         return;
7016                 }
7017                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7018         }
7019
7020         bridge_pos = pci_get_pciecap_ptr(root);
7021         if (!bridge_pos)
7022                 return;
7023
7024         gpu_pos = pci_get_pciecap_ptr(rdev->dev);
7025         if (!gpu_pos)
7026                 return;
7027
7028         if (mask & DRM_PCIE_SPEED_80) {
7029                 /* re-try equalization if gen3 is not already enabled */
7030                 if (current_data_rate != 2) {
7031                         u16 bridge_cfg, gpu_cfg;
7032                         u16 bridge_cfg2, gpu_cfg2;
7033                         u32 max_lw, current_lw, tmp;
7034
7035                         pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7036                         pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7037
7038                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7039                         pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7040
7041                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7042                         pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7043
7044                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7045                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7046                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7047
7048                         if (current_lw < max_lw) {
7049                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7050                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7051                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7052                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7053                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7054                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7055                                 }
7056                         }
7057
7058                         for (i = 0; i < 10; i++) {
7059                                 /* check status */
7060                                 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7061                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7062                                         break;
7063
7064                                 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7065                                 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7066
7067                                 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7068                                 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7069
7070                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7071                                 tmp |= LC_SET_QUIESCE;
7072                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7073
7074                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7075                                 tmp |= LC_REDO_EQ;
7076                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7077
7078                                 DRM_MDELAY(100);
7079
7080                                 /* linkctl */
7081                                 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7082                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7083                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7084                                 pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7085
7086                                 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7087                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7088                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7089                                 pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7090
7091                                 /* linkctl2 */
7092                                 pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7093                                 tmp16 &= ~((1 << 4) | (7 << 9));
7094                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7095                                 pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7096
7097                                 pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7098                                 tmp16 &= ~((1 << 4) | (7 << 9));
7099                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7100                                 pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7101
7102                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7103                                 tmp &= ~LC_SET_QUIESCE;
7104                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7105                         }
7106                 }
7107         }
7108
7109         /* set the link speed */
7110         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7111         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7112         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7113
7114         pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7115         tmp16 &= ~0xf;
7116         if (mask & DRM_PCIE_SPEED_80)
7117                 tmp16 |= 3; /* gen3 */
7118         else if (mask & DRM_PCIE_SPEED_50)
7119                 tmp16 |= 2; /* gen2 */
7120         else
7121                 tmp16 |= 1; /* gen1 */
7122         pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7123
7124         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7125         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7126         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7127
7128         for (i = 0; i < rdev->usec_timeout; i++) {
7129                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7130                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7131                         break;
7132                 DRM_UDELAY(1);
7133         }
7134 }
7135
7136 static void si_program_aspm(struct radeon_device *rdev)
7137 {
7138         u32 data, orig;
7139         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7140         bool disable_clkreq = false;
7141
7142         if (radeon_aspm == 0)
7143                 return;
7144
7145         if (!(rdev->flags & RADEON_IS_PCIE))
7146                 return;
7147
7148         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7149         data &= ~LC_XMIT_N_FTS_MASK;
7150         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7151         if (orig != data)
7152                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7153
7154         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7155         data |= LC_GO_TO_RECOVERY;
7156         if (orig != data)
7157                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7158
7159         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7160         data |= P_IGNORE_EDB_ERR;
7161         if (orig != data)
7162                 WREG32_PCIE(PCIE_P_CNTL, data);
7163
7164         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7165         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7166         data |= LC_PMI_TO_L1_DIS;
7167         if (!disable_l0s)
7168                 data |= LC_L0S_INACTIVITY(7);
7169
7170         if (!disable_l1) {
7171                 data |= LC_L1_INACTIVITY(7);
7172                 data &= ~LC_PMI_TO_L1_DIS;
7173                 if (orig != data)
7174                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7175
7176                 if (!disable_plloff_in_l1) {
7177                         bool clk_req_support;
7178
7179                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7180                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7181                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7182                         if (orig != data)
7183                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7184
7185                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7186                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7187                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7188                         if (orig != data)
7189                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7190
7191                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7192                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7193                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7194                         if (orig != data)
7195                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7196
7197                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7198                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7199                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7200                         if (orig != data)
7201                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7202
7203                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7204                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7205                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7206                                 if (orig != data)
7207                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7208
7209                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7210                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7211                                 if (orig != data)
7212                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7213
7214                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7215                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7216                                 if (orig != data)
7217                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7218
7219                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7220                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7221                                 if (orig != data)
7222                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7223
7224                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7225                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7226                                 if (orig != data)
7227                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7228
7229                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7230                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7231                                 if (orig != data)
7232                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7233
7234                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7235                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7236                                 if (orig != data)
7237                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7238
7239                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7240                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7241                                 if (orig != data)
7242                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7243                         }
7244                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7245                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7246                         data |= LC_DYN_LANES_PWR_STATE(3);
7247                         if (orig != data)
7248                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7249
7250                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7251                         data &= ~LS2_EXIT_TIME_MASK;
7252                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7253                                 data |= LS2_EXIT_TIME(5);
7254                         if (orig != data)
7255                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7256
7257                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7258                         data &= ~LS2_EXIT_TIME_MASK;
7259                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7260                                 data |= LS2_EXIT_TIME(5);
7261                         if (orig != data)
7262                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7263
7264                         if (!disable_clkreq) {
7265 #ifdef MN_TODO
7266                                 struct pci_dev *root = rdev->pdev->bus->self;
7267                                 u32 lnkcap;
7268
7269                                 clk_req_support = false;
7270                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7271                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7272                                         clk_req_support = true;
7273 #else
7274                                 clk_req_support = false;
7275 #endif
7276                         } else {
7277                                 clk_req_support = false;
7278                         }
7279
7280                         if (clk_req_support) {
7281                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7282                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7283                                 if (orig != data)
7284                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7285
7286                                 orig = data = RREG32(THM_CLK_CNTL);
7287                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7288                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7289                                 if (orig != data)
7290                                         WREG32(THM_CLK_CNTL, data);
7291
7292                                 orig = data = RREG32(MISC_CLK_CNTL);
7293                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7294                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7295                                 if (orig != data)
7296                                         WREG32(MISC_CLK_CNTL, data);
7297
7298                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7299                                 data &= ~BCLK_AS_XCLK;
7300                                 if (orig != data)
7301                                         WREG32(CG_CLKPIN_CNTL, data);
7302
7303                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7304                                 data &= ~FORCE_BIF_REFCLK_EN;
7305                                 if (orig != data)
7306                                         WREG32(CG_CLKPIN_CNTL_2, data);
7307
7308                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7309                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7310                                 data |= MPLL_CLKOUT_SEL(4);
7311                                 if (orig != data)
7312                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7313
7314                                 orig = data = RREG32(SPLL_CNTL_MODE);
7315                                 data &= ~SPLL_REFCLK_SEL_MASK;
7316                                 if (orig != data)
7317                                         WREG32(SPLL_CNTL_MODE, data);
7318                         }
7319                 }
7320         } else {
7321                 if (orig != data)
7322                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7323         }
7324
7325         orig = data = RREG32_PCIE(PCIE_CNTL2);
7326         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7327         if (orig != data)
7328                 WREG32_PCIE(PCIE_CNTL2, data);
7329
7330         if (!disable_l0s) {
7331                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7332                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7333                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7334                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7335                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7336                                 data &= ~LC_L0S_INACTIVITY_MASK;
7337                                 if (orig != data)
7338                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7339                         }
7340                 }
7341         }
7342 }