drm/radeon: Disable acceleration on si
[dragonfly.git] / sys / dev / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45
46 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
47 MODULE_FIRMWARE("radeon/tahiti_me.bin");
48 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
49 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
53
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
69
70 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
71 MODULE_FIRMWARE("radeon/VERDE_me.bin");
72 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
75 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
76 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
77
78 MODULE_FIRMWARE("radeon/verde_pfp.bin");
79 MODULE_FIRMWARE("radeon/verde_me.bin");
80 MODULE_FIRMWARE("radeon/verde_ce.bin");
81 MODULE_FIRMWARE("radeon/verde_mc.bin");
82 MODULE_FIRMWARE("radeon/verde_rlc.bin");
83 MODULE_FIRMWARE("radeon/verde_smc.bin");
84 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
85
86 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
87 MODULE_FIRMWARE("radeon/OLAND_me.bin");
88 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
89 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
91 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
92 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
93
94 MODULE_FIRMWARE("radeon/oland_pfp.bin");
95 MODULE_FIRMWARE("radeon/oland_me.bin");
96 MODULE_FIRMWARE("radeon/oland_ce.bin");
97 MODULE_FIRMWARE("radeon/oland_mc.bin");
98 MODULE_FIRMWARE("radeon/oland_rlc.bin");
99 MODULE_FIRMWARE("radeon/oland_smc.bin");
100 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
101
102 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
109
110 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
111 MODULE_FIRMWARE("radeon/hainan_me.bin");
112 MODULE_FIRMWARE("radeon/hainan_ce.bin");
113 MODULE_FIRMWARE("radeon/hainan_mc.bin");
114 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
115 MODULE_FIRMWARE("radeon/hainan_smc.bin");
116 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
117
118 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
119 static void si_pcie_gen3_enable(struct radeon_device *rdev);
120 static void si_program_aspm(struct radeon_device *rdev);
121 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
122                                          bool enable);
123 static void si_init_pg(struct radeon_device *rdev);
124 static void si_init_cg(struct radeon_device *rdev);
125 static void si_fini_pg(struct radeon_device *rdev);
126 static void si_fini_cg(struct radeon_device *rdev);
127 static void si_rlc_stop(struct radeon_device *rdev);
128
129 static const u32 verde_rlc_save_restore_register_list[] =
130 {
131         (0x8000 << 16) | (0x98f4 >> 2),
132         0x00000000,
133         (0x8040 << 16) | (0x98f4 >> 2),
134         0x00000000,
135         (0x8000 << 16) | (0xe80 >> 2),
136         0x00000000,
137         (0x8040 << 16) | (0xe80 >> 2),
138         0x00000000,
139         (0x8000 << 16) | (0x89bc >> 2),
140         0x00000000,
141         (0x8040 << 16) | (0x89bc >> 2),
142         0x00000000,
143         (0x8000 << 16) | (0x8c1c >> 2),
144         0x00000000,
145         (0x8040 << 16) | (0x8c1c >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x98f0 >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0xe7c >> 2),
150         0x00000000,
151         (0x8000 << 16) | (0x9148 >> 2),
152         0x00000000,
153         (0x8040 << 16) | (0x9148 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x9150 >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x897c >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x8d8c >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0xac54 >> 2),
162         0X00000000,
163         0x3,
164         (0x9c00 << 16) | (0x98f8 >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0x9910 >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x9914 >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x9918 >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x991c >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9920 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9924 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x9928 >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x992c >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x9930 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x9934 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x9938 >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x993c >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0x9940 >> 2),
191         0x00000000,
192         (0x9c00 << 16) | (0x9944 >> 2),
193         0x00000000,
194         (0x9c00 << 16) | (0x9948 >> 2),
195         0x00000000,
196         (0x9c00 << 16) | (0x994c >> 2),
197         0x00000000,
198         (0x9c00 << 16) | (0x9950 >> 2),
199         0x00000000,
200         (0x9c00 << 16) | (0x9954 >> 2),
201         0x00000000,
202         (0x9c00 << 16) | (0x9958 >> 2),
203         0x00000000,
204         (0x9c00 << 16) | (0x995c >> 2),
205         0x00000000,
206         (0x9c00 << 16) | (0x9960 >> 2),
207         0x00000000,
208         (0x9c00 << 16) | (0x9964 >> 2),
209         0x00000000,
210         (0x9c00 << 16) | (0x9968 >> 2),
211         0x00000000,
212         (0x9c00 << 16) | (0x996c >> 2),
213         0x00000000,
214         (0x9c00 << 16) | (0x9970 >> 2),
215         0x00000000,
216         (0x9c00 << 16) | (0x9974 >> 2),
217         0x00000000,
218         (0x9c00 << 16) | (0x9978 >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x997c >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x9980 >> 2),
223         0x00000000,
224         (0x9c00 << 16) | (0x9984 >> 2),
225         0x00000000,
226         (0x9c00 << 16) | (0x9988 >> 2),
227         0x00000000,
228         (0x9c00 << 16) | (0x998c >> 2),
229         0x00000000,
230         (0x9c00 << 16) | (0x8c00 >> 2),
231         0x00000000,
232         (0x9c00 << 16) | (0x8c14 >> 2),
233         0x00000000,
234         (0x9c00 << 16) | (0x8c04 >> 2),
235         0x00000000,
236         (0x9c00 << 16) | (0x8c08 >> 2),
237         0x00000000,
238         (0x8000 << 16) | (0x9b7c >> 2),
239         0x00000000,
240         (0x8040 << 16) | (0x9b7c >> 2),
241         0x00000000,
242         (0x8000 << 16) | (0xe84 >> 2),
243         0x00000000,
244         (0x8040 << 16) | (0xe84 >> 2),
245         0x00000000,
246         (0x8000 << 16) | (0x89c0 >> 2),
247         0x00000000,
248         (0x8040 << 16) | (0x89c0 >> 2),
249         0x00000000,
250         (0x8000 << 16) | (0x914c >> 2),
251         0x00000000,
252         (0x8040 << 16) | (0x914c >> 2),
253         0x00000000,
254         (0x8000 << 16) | (0x8c20 >> 2),
255         0x00000000,
256         (0x8040 << 16) | (0x8c20 >> 2),
257         0x00000000,
258         (0x8000 << 16) | (0x9354 >> 2),
259         0x00000000,
260         (0x8040 << 16) | (0x9354 >> 2),
261         0x00000000,
262         (0x9c00 << 16) | (0x9060 >> 2),
263         0x00000000,
264         (0x9c00 << 16) | (0x9364 >> 2),
265         0x00000000,
266         (0x9c00 << 16) | (0x9100 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0x913c >> 2),
269         0x00000000,
270         (0x8000 << 16) | (0x90e0 >> 2),
271         0x00000000,
272         (0x8000 << 16) | (0x90e4 >> 2),
273         0x00000000,
274         (0x8000 << 16) | (0x90e8 >> 2),
275         0x00000000,
276         (0x8040 << 16) | (0x90e0 >> 2),
277         0x00000000,
278         (0x8040 << 16) | (0x90e4 >> 2),
279         0x00000000,
280         (0x8040 << 16) | (0x90e8 >> 2),
281         0x00000000,
282         (0x9c00 << 16) | (0x8bcc >> 2),
283         0x00000000,
284         (0x9c00 << 16) | (0x8b24 >> 2),
285         0x00000000,
286         (0x9c00 << 16) | (0x88c4 >> 2),
287         0x00000000,
288         (0x9c00 << 16) | (0x8e50 >> 2),
289         0x00000000,
290         (0x9c00 << 16) | (0x8c0c >> 2),
291         0x00000000,
292         (0x9c00 << 16) | (0x8e58 >> 2),
293         0x00000000,
294         (0x9c00 << 16) | (0x8e5c >> 2),
295         0x00000000,
296         (0x9c00 << 16) | (0x9508 >> 2),
297         0x00000000,
298         (0x9c00 << 16) | (0x950c >> 2),
299         0x00000000,
300         (0x9c00 << 16) | (0x9494 >> 2),
301         0x00000000,
302         (0x9c00 << 16) | (0xac0c >> 2),
303         0x00000000,
304         (0x9c00 << 16) | (0xac10 >> 2),
305         0x00000000,
306         (0x9c00 << 16) | (0xac14 >> 2),
307         0x00000000,
308         (0x9c00 << 16) | (0xae00 >> 2),
309         0x00000000,
310         (0x9c00 << 16) | (0xac08 >> 2),
311         0x00000000,
312         (0x9c00 << 16) | (0x88d4 >> 2),
313         0x00000000,
314         (0x9c00 << 16) | (0x88c8 >> 2),
315         0x00000000,
316         (0x9c00 << 16) | (0x88cc >> 2),
317         0x00000000,
318         (0x9c00 << 16) | (0x89b0 >> 2),
319         0x00000000,
320         (0x9c00 << 16) | (0x8b10 >> 2),
321         0x00000000,
322         (0x9c00 << 16) | (0x8a14 >> 2),
323         0x00000000,
324         (0x9c00 << 16) | (0x9830 >> 2),
325         0x00000000,
326         (0x9c00 << 16) | (0x9834 >> 2),
327         0x00000000,
328         (0x9c00 << 16) | (0x9838 >> 2),
329         0x00000000,
330         (0x9c00 << 16) | (0x9a10 >> 2),
331         0x00000000,
332         (0x8000 << 16) | (0x9870 >> 2),
333         0x00000000,
334         (0x8000 << 16) | (0x9874 >> 2),
335         0x00000000,
336         (0x8001 << 16) | (0x9870 >> 2),
337         0x00000000,
338         (0x8001 << 16) | (0x9874 >> 2),
339         0x00000000,
340         (0x8040 << 16) | (0x9870 >> 2),
341         0x00000000,
342         (0x8040 << 16) | (0x9874 >> 2),
343         0x00000000,
344         (0x8041 << 16) | (0x9870 >> 2),
345         0x00000000,
346         (0x8041 << 16) | (0x9874 >> 2),
347         0x00000000,
348         0x00000000
349 };
350
351 static const u32 tahiti_golden_rlc_registers[] =
352 {
353         0xc424, 0xffffffff, 0x00601005,
354         0xc47c, 0xffffffff, 0x10104040,
355         0xc488, 0xffffffff, 0x0100000a,
356         0xc314, 0xffffffff, 0x00000800,
357         0xc30c, 0xffffffff, 0x800000f4,
358         0xf4a8, 0xffffffff, 0x00000000
359 };
360
361 static const u32 tahiti_golden_registers[] =
362 {
363         0x9a10, 0x00010000, 0x00018208,
364         0x9830, 0xffffffff, 0x00000000,
365         0x9834, 0xf00fffff, 0x00000400,
366         0x9838, 0x0002021c, 0x00020200,
367         0xc78, 0x00000080, 0x00000000,
368         0xd030, 0x000300c0, 0x00800040,
369         0xd830, 0x000300c0, 0x00800040,
370         0x5bb0, 0x000000f0, 0x00000070,
371         0x5bc0, 0x00200000, 0x50100000,
372         0x7030, 0x31000311, 0x00000011,
373         0x277c, 0x00000003, 0x000007ff,
374         0x240c, 0x000007ff, 0x00000000,
375         0x8a14, 0xf000001f, 0x00000007,
376         0x8b24, 0xffffffff, 0x00ffffff,
377         0x8b10, 0x0000ff0f, 0x00000000,
378         0x28a4c, 0x07ffffff, 0x4e000000,
379         0x28350, 0x3f3f3fff, 0x2a00126a,
380         0x30, 0x000000ff, 0x0040,
381         0x34, 0x00000040, 0x00004040,
382         0x9100, 0x07ffffff, 0x03000000,
383         0x8e88, 0x01ff1f3f, 0x00000000,
384         0x8e84, 0x01ff1f3f, 0x00000000,
385         0x9060, 0x0000007f, 0x00000020,
386         0x9508, 0x00010000, 0x00010000,
387         0xac14, 0x00000200, 0x000002fb,
388         0xac10, 0xffffffff, 0x0000543b,
389         0xac0c, 0xffffffff, 0xa9210876,
390         0x88d0, 0xffffffff, 0x000fff40,
391         0x88d4, 0x0000001f, 0x00000010,
392         0x1410, 0x20000000, 0x20fffed8,
393         0x15c0, 0x000c0fc0, 0x000c0400
394 };
395
396 static const u32 tahiti_golden_registers2[] =
397 {
398         0xc64, 0x00000001, 0x00000001
399 };
400
401 static const u32 pitcairn_golden_rlc_registers[] =
402 {
403         0xc424, 0xffffffff, 0x00601004,
404         0xc47c, 0xffffffff, 0x10102020,
405         0xc488, 0xffffffff, 0x01000020,
406         0xc314, 0xffffffff, 0x00000800,
407         0xc30c, 0xffffffff, 0x800000a4
408 };
409
410 static const u32 pitcairn_golden_registers[] =
411 {
412         0x9a10, 0x00010000, 0x00018208,
413         0x9830, 0xffffffff, 0x00000000,
414         0x9834, 0xf00fffff, 0x00000400,
415         0x9838, 0x0002021c, 0x00020200,
416         0xc78, 0x00000080, 0x00000000,
417         0xd030, 0x000300c0, 0x00800040,
418         0xd830, 0x000300c0, 0x00800040,
419         0x5bb0, 0x000000f0, 0x00000070,
420         0x5bc0, 0x00200000, 0x50100000,
421         0x7030, 0x31000311, 0x00000011,
422         0x2ae4, 0x00073ffe, 0x000022a2,
423         0x240c, 0x000007ff, 0x00000000,
424         0x8a14, 0xf000001f, 0x00000007,
425         0x8b24, 0xffffffff, 0x00ffffff,
426         0x8b10, 0x0000ff0f, 0x00000000,
427         0x28a4c, 0x07ffffff, 0x4e000000,
428         0x28350, 0x3f3f3fff, 0x2a00126a,
429         0x30, 0x000000ff, 0x0040,
430         0x34, 0x00000040, 0x00004040,
431         0x9100, 0x07ffffff, 0x03000000,
432         0x9060, 0x0000007f, 0x00000020,
433         0x9508, 0x00010000, 0x00010000,
434         0xac14, 0x000003ff, 0x000000f7,
435         0xac10, 0xffffffff, 0x00000000,
436         0xac0c, 0xffffffff, 0x32761054,
437         0x88d4, 0x0000001f, 0x00000010,
438         0x15c0, 0x000c0fc0, 0x000c0400
439 };
440
441 static const u32 verde_golden_rlc_registers[] =
442 {
443         0xc424, 0xffffffff, 0x033f1005,
444         0xc47c, 0xffffffff, 0x10808020,
445         0xc488, 0xffffffff, 0x00800008,
446         0xc314, 0xffffffff, 0x00001000,
447         0xc30c, 0xffffffff, 0x80010014
448 };
449
450 static const u32 verde_golden_registers[] =
451 {
452         0x9a10, 0x00010000, 0x00018208,
453         0x9830, 0xffffffff, 0x00000000,
454         0x9834, 0xf00fffff, 0x00000400,
455         0x9838, 0x0002021c, 0x00020200,
456         0xc78, 0x00000080, 0x00000000,
457         0xd030, 0x000300c0, 0x00800040,
458         0xd030, 0x000300c0, 0x00800040,
459         0xd830, 0x000300c0, 0x00800040,
460         0xd830, 0x000300c0, 0x00800040,
461         0x5bb0, 0x000000f0, 0x00000070,
462         0x5bc0, 0x00200000, 0x50100000,
463         0x7030, 0x31000311, 0x00000011,
464         0x2ae4, 0x00073ffe, 0x000022a2,
465         0x2ae4, 0x00073ffe, 0x000022a2,
466         0x2ae4, 0x00073ffe, 0x000022a2,
467         0x240c, 0x000007ff, 0x00000000,
468         0x240c, 0x000007ff, 0x00000000,
469         0x240c, 0x000007ff, 0x00000000,
470         0x8a14, 0xf000001f, 0x00000007,
471         0x8a14, 0xf000001f, 0x00000007,
472         0x8a14, 0xf000001f, 0x00000007,
473         0x8b24, 0xffffffff, 0x00ffffff,
474         0x8b10, 0x0000ff0f, 0x00000000,
475         0x28a4c, 0x07ffffff, 0x4e000000,
476         0x28350, 0x3f3f3fff, 0x0000124a,
477         0x28350, 0x3f3f3fff, 0x0000124a,
478         0x28350, 0x3f3f3fff, 0x0000124a,
479         0x30, 0x000000ff, 0x0040,
480         0x34, 0x00000040, 0x00004040,
481         0x9100, 0x07ffffff, 0x03000000,
482         0x9100, 0x07ffffff, 0x03000000,
483         0x8e88, 0x01ff1f3f, 0x00000000,
484         0x8e88, 0x01ff1f3f, 0x00000000,
485         0x8e88, 0x01ff1f3f, 0x00000000,
486         0x8e84, 0x01ff1f3f, 0x00000000,
487         0x8e84, 0x01ff1f3f, 0x00000000,
488         0x8e84, 0x01ff1f3f, 0x00000000,
489         0x9060, 0x0000007f, 0x00000020,
490         0x9508, 0x00010000, 0x00010000,
491         0xac14, 0x000003ff, 0x00000003,
492         0xac14, 0x000003ff, 0x00000003,
493         0xac14, 0x000003ff, 0x00000003,
494         0xac10, 0xffffffff, 0x00000000,
495         0xac10, 0xffffffff, 0x00000000,
496         0xac10, 0xffffffff, 0x00000000,
497         0xac0c, 0xffffffff, 0x00001032,
498         0xac0c, 0xffffffff, 0x00001032,
499         0xac0c, 0xffffffff, 0x00001032,
500         0x88d4, 0x0000001f, 0x00000010,
501         0x88d4, 0x0000001f, 0x00000010,
502         0x88d4, 0x0000001f, 0x00000010,
503         0x15c0, 0x000c0fc0, 0x000c0400
504 };
505
506 static const u32 oland_golden_rlc_registers[] =
507 {
508         0xc424, 0xffffffff, 0x00601005,
509         0xc47c, 0xffffffff, 0x10104040,
510         0xc488, 0xffffffff, 0x0100000a,
511         0xc314, 0xffffffff, 0x00000800,
512         0xc30c, 0xffffffff, 0x800000f4
513 };
514
515 static const u32 oland_golden_registers[] =
516 {
517         0x9a10, 0x00010000, 0x00018208,
518         0x9830, 0xffffffff, 0x00000000,
519         0x9834, 0xf00fffff, 0x00000400,
520         0x9838, 0x0002021c, 0x00020200,
521         0xc78, 0x00000080, 0x00000000,
522         0xd030, 0x000300c0, 0x00800040,
523         0xd830, 0x000300c0, 0x00800040,
524         0x5bb0, 0x000000f0, 0x00000070,
525         0x5bc0, 0x00200000, 0x50100000,
526         0x7030, 0x31000311, 0x00000011,
527         0x2ae4, 0x00073ffe, 0x000022a2,
528         0x240c, 0x000007ff, 0x00000000,
529         0x8a14, 0xf000001f, 0x00000007,
530         0x8b24, 0xffffffff, 0x00ffffff,
531         0x8b10, 0x0000ff0f, 0x00000000,
532         0x28a4c, 0x07ffffff, 0x4e000000,
533         0x28350, 0x3f3f3fff, 0x00000082,
534         0x30, 0x000000ff, 0x0040,
535         0x34, 0x00000040, 0x00004040,
536         0x9100, 0x07ffffff, 0x03000000,
537         0x9060, 0x0000007f, 0x00000020,
538         0x9508, 0x00010000, 0x00010000,
539         0xac14, 0x000003ff, 0x000000f3,
540         0xac10, 0xffffffff, 0x00000000,
541         0xac0c, 0xffffffff, 0x00003210,
542         0x88d4, 0x0000001f, 0x00000010,
543         0x15c0, 0x000c0fc0, 0x000c0400
544 };
545
546 static const u32 hainan_golden_registers[] =
547 {
548         0x9a10, 0x00010000, 0x00018208,
549         0x9830, 0xffffffff, 0x00000000,
550         0x9834, 0xf00fffff, 0x00000400,
551         0x9838, 0x0002021c, 0x00020200,
552         0xd0c0, 0xff000fff, 0x00000100,
553         0xd030, 0x000300c0, 0x00800040,
554         0xd8c0, 0xff000fff, 0x00000100,
555         0xd830, 0x000300c0, 0x00800040,
556         0x2ae4, 0x00073ffe, 0x000022a2,
557         0x240c, 0x000007ff, 0x00000000,
558         0x8a14, 0xf000001f, 0x00000007,
559         0x8b24, 0xffffffff, 0x00ffffff,
560         0x8b10, 0x0000ff0f, 0x00000000,
561         0x28a4c, 0x07ffffff, 0x4e000000,
562         0x28350, 0x3f3f3fff, 0x00000000,
563         0x30, 0x000000ff, 0x0040,
564         0x34, 0x00000040, 0x00004040,
565         0x9100, 0x03e00000, 0x03600000,
566         0x9060, 0x0000007f, 0x00000020,
567         0x9508, 0x00010000, 0x00010000,
568         0xac14, 0x000003ff, 0x000000f1,
569         0xac10, 0xffffffff, 0x00000000,
570         0xac0c, 0xffffffff, 0x00003210,
571         0x88d4, 0x0000001f, 0x00000010,
572         0x15c0, 0x000c0fc0, 0x000c0400
573 };
574
575 static const u32 hainan_golden_registers2[] =
576 {
577         0x98f8, 0xffffffff, 0x02010001
578 };
579
580 static const u32 tahiti_mgcg_cgcg_init[] =
581 {
582         0xc400, 0xffffffff, 0xfffffffc,
583         0x802c, 0xffffffff, 0xe0000000,
584         0x9a60, 0xffffffff, 0x00000100,
585         0x92a4, 0xffffffff, 0x00000100,
586         0xc164, 0xffffffff, 0x00000100,
587         0x9774, 0xffffffff, 0x00000100,
588         0x8984, 0xffffffff, 0x06000100,
589         0x8a18, 0xffffffff, 0x00000100,
590         0x92a0, 0xffffffff, 0x00000100,
591         0xc380, 0xffffffff, 0x00000100,
592         0x8b28, 0xffffffff, 0x00000100,
593         0x9144, 0xffffffff, 0x00000100,
594         0x8d88, 0xffffffff, 0x00000100,
595         0x8d8c, 0xffffffff, 0x00000100,
596         0x9030, 0xffffffff, 0x00000100,
597         0x9034, 0xffffffff, 0x00000100,
598         0x9038, 0xffffffff, 0x00000100,
599         0x903c, 0xffffffff, 0x00000100,
600         0xad80, 0xffffffff, 0x00000100,
601         0xac54, 0xffffffff, 0x00000100,
602         0x897c, 0xffffffff, 0x06000100,
603         0x9868, 0xffffffff, 0x00000100,
604         0x9510, 0xffffffff, 0x00000100,
605         0xaf04, 0xffffffff, 0x00000100,
606         0xae04, 0xffffffff, 0x00000100,
607         0x949c, 0xffffffff, 0x00000100,
608         0x802c, 0xffffffff, 0xe0000000,
609         0x9160, 0xffffffff, 0x00010000,
610         0x9164, 0xffffffff, 0x00030002,
611         0x9168, 0xffffffff, 0x00040007,
612         0x916c, 0xffffffff, 0x00060005,
613         0x9170, 0xffffffff, 0x00090008,
614         0x9174, 0xffffffff, 0x00020001,
615         0x9178, 0xffffffff, 0x00040003,
616         0x917c, 0xffffffff, 0x00000007,
617         0x9180, 0xffffffff, 0x00060005,
618         0x9184, 0xffffffff, 0x00090008,
619         0x9188, 0xffffffff, 0x00030002,
620         0x918c, 0xffffffff, 0x00050004,
621         0x9190, 0xffffffff, 0x00000008,
622         0x9194, 0xffffffff, 0x00070006,
623         0x9198, 0xffffffff, 0x000a0009,
624         0x919c, 0xffffffff, 0x00040003,
625         0x91a0, 0xffffffff, 0x00060005,
626         0x91a4, 0xffffffff, 0x00000009,
627         0x91a8, 0xffffffff, 0x00080007,
628         0x91ac, 0xffffffff, 0x000b000a,
629         0x91b0, 0xffffffff, 0x00050004,
630         0x91b4, 0xffffffff, 0x00070006,
631         0x91b8, 0xffffffff, 0x0008000b,
632         0x91bc, 0xffffffff, 0x000a0009,
633         0x91c0, 0xffffffff, 0x000d000c,
634         0x91c4, 0xffffffff, 0x00060005,
635         0x91c8, 0xffffffff, 0x00080007,
636         0x91cc, 0xffffffff, 0x0000000b,
637         0x91d0, 0xffffffff, 0x000a0009,
638         0x91d4, 0xffffffff, 0x000d000c,
639         0x91d8, 0xffffffff, 0x00070006,
640         0x91dc, 0xffffffff, 0x00090008,
641         0x91e0, 0xffffffff, 0x0000000c,
642         0x91e4, 0xffffffff, 0x000b000a,
643         0x91e8, 0xffffffff, 0x000e000d,
644         0x91ec, 0xffffffff, 0x00080007,
645         0x91f0, 0xffffffff, 0x000a0009,
646         0x91f4, 0xffffffff, 0x0000000d,
647         0x91f8, 0xffffffff, 0x000c000b,
648         0x91fc, 0xffffffff, 0x000f000e,
649         0x9200, 0xffffffff, 0x00090008,
650         0x9204, 0xffffffff, 0x000b000a,
651         0x9208, 0xffffffff, 0x000c000f,
652         0x920c, 0xffffffff, 0x000e000d,
653         0x9210, 0xffffffff, 0x00110010,
654         0x9214, 0xffffffff, 0x000a0009,
655         0x9218, 0xffffffff, 0x000c000b,
656         0x921c, 0xffffffff, 0x0000000f,
657         0x9220, 0xffffffff, 0x000e000d,
658         0x9224, 0xffffffff, 0x00110010,
659         0x9228, 0xffffffff, 0x000b000a,
660         0x922c, 0xffffffff, 0x000d000c,
661         0x9230, 0xffffffff, 0x00000010,
662         0x9234, 0xffffffff, 0x000f000e,
663         0x9238, 0xffffffff, 0x00120011,
664         0x923c, 0xffffffff, 0x000c000b,
665         0x9240, 0xffffffff, 0x000e000d,
666         0x9244, 0xffffffff, 0x00000011,
667         0x9248, 0xffffffff, 0x0010000f,
668         0x924c, 0xffffffff, 0x00130012,
669         0x9250, 0xffffffff, 0x000d000c,
670         0x9254, 0xffffffff, 0x000f000e,
671         0x9258, 0xffffffff, 0x00100013,
672         0x925c, 0xffffffff, 0x00120011,
673         0x9260, 0xffffffff, 0x00150014,
674         0x9264, 0xffffffff, 0x000e000d,
675         0x9268, 0xffffffff, 0x0010000f,
676         0x926c, 0xffffffff, 0x00000013,
677         0x9270, 0xffffffff, 0x00120011,
678         0x9274, 0xffffffff, 0x00150014,
679         0x9278, 0xffffffff, 0x000f000e,
680         0x927c, 0xffffffff, 0x00110010,
681         0x9280, 0xffffffff, 0x00000014,
682         0x9284, 0xffffffff, 0x00130012,
683         0x9288, 0xffffffff, 0x00160015,
684         0x928c, 0xffffffff, 0x0010000f,
685         0x9290, 0xffffffff, 0x00120011,
686         0x9294, 0xffffffff, 0x00000015,
687         0x9298, 0xffffffff, 0x00140013,
688         0x929c, 0xffffffff, 0x00170016,
689         0x9150, 0xffffffff, 0x96940200,
690         0x8708, 0xffffffff, 0x00900100,
691         0xc478, 0xffffffff, 0x00000080,
692         0xc404, 0xffffffff, 0x0020003f,
693         0x30, 0xffffffff, 0x0000001c,
694         0x34, 0x000f0000, 0x000f0000,
695         0x160c, 0xffffffff, 0x00000100,
696         0x1024, 0xffffffff, 0x00000100,
697         0x102c, 0x00000101, 0x00000000,
698         0x20a8, 0xffffffff, 0x00000104,
699         0x264c, 0x000c0000, 0x000c0000,
700         0x2648, 0x000c0000, 0x000c0000,
701         0x55e4, 0xff000fff, 0x00000100,
702         0x55e8, 0x00000001, 0x00000001,
703         0x2f50, 0x00000001, 0x00000001,
704         0x30cc, 0xc0000fff, 0x00000104,
705         0xc1e4, 0x00000001, 0x00000001,
706         0xd0c0, 0xfffffff0, 0x00000100,
707         0xd8c0, 0xfffffff0, 0x00000100
708 };
709
710 static const u32 pitcairn_mgcg_cgcg_init[] =
711 {
712         0xc400, 0xffffffff, 0xfffffffc,
713         0x802c, 0xffffffff, 0xe0000000,
714         0x9a60, 0xffffffff, 0x00000100,
715         0x92a4, 0xffffffff, 0x00000100,
716         0xc164, 0xffffffff, 0x00000100,
717         0x9774, 0xffffffff, 0x00000100,
718         0x8984, 0xffffffff, 0x06000100,
719         0x8a18, 0xffffffff, 0x00000100,
720         0x92a0, 0xffffffff, 0x00000100,
721         0xc380, 0xffffffff, 0x00000100,
722         0x8b28, 0xffffffff, 0x00000100,
723         0x9144, 0xffffffff, 0x00000100,
724         0x8d88, 0xffffffff, 0x00000100,
725         0x8d8c, 0xffffffff, 0x00000100,
726         0x9030, 0xffffffff, 0x00000100,
727         0x9034, 0xffffffff, 0x00000100,
728         0x9038, 0xffffffff, 0x00000100,
729         0x903c, 0xffffffff, 0x00000100,
730         0xad80, 0xffffffff, 0x00000100,
731         0xac54, 0xffffffff, 0x00000100,
732         0x897c, 0xffffffff, 0x06000100,
733         0x9868, 0xffffffff, 0x00000100,
734         0x9510, 0xffffffff, 0x00000100,
735         0xaf04, 0xffffffff, 0x00000100,
736         0xae04, 0xffffffff, 0x00000100,
737         0x949c, 0xffffffff, 0x00000100,
738         0x802c, 0xffffffff, 0xe0000000,
739         0x9160, 0xffffffff, 0x00010000,
740         0x9164, 0xffffffff, 0x00030002,
741         0x9168, 0xffffffff, 0x00040007,
742         0x916c, 0xffffffff, 0x00060005,
743         0x9170, 0xffffffff, 0x00090008,
744         0x9174, 0xffffffff, 0x00020001,
745         0x9178, 0xffffffff, 0x00040003,
746         0x917c, 0xffffffff, 0x00000007,
747         0x9180, 0xffffffff, 0x00060005,
748         0x9184, 0xffffffff, 0x00090008,
749         0x9188, 0xffffffff, 0x00030002,
750         0x918c, 0xffffffff, 0x00050004,
751         0x9190, 0xffffffff, 0x00000008,
752         0x9194, 0xffffffff, 0x00070006,
753         0x9198, 0xffffffff, 0x000a0009,
754         0x919c, 0xffffffff, 0x00040003,
755         0x91a0, 0xffffffff, 0x00060005,
756         0x91a4, 0xffffffff, 0x00000009,
757         0x91a8, 0xffffffff, 0x00080007,
758         0x91ac, 0xffffffff, 0x000b000a,
759         0x91b0, 0xffffffff, 0x00050004,
760         0x91b4, 0xffffffff, 0x00070006,
761         0x91b8, 0xffffffff, 0x0008000b,
762         0x91bc, 0xffffffff, 0x000a0009,
763         0x91c0, 0xffffffff, 0x000d000c,
764         0x9200, 0xffffffff, 0x00090008,
765         0x9204, 0xffffffff, 0x000b000a,
766         0x9208, 0xffffffff, 0x000c000f,
767         0x920c, 0xffffffff, 0x000e000d,
768         0x9210, 0xffffffff, 0x00110010,
769         0x9214, 0xffffffff, 0x000a0009,
770         0x9218, 0xffffffff, 0x000c000b,
771         0x921c, 0xffffffff, 0x0000000f,
772         0x9220, 0xffffffff, 0x000e000d,
773         0x9224, 0xffffffff, 0x00110010,
774         0x9228, 0xffffffff, 0x000b000a,
775         0x922c, 0xffffffff, 0x000d000c,
776         0x9230, 0xffffffff, 0x00000010,
777         0x9234, 0xffffffff, 0x000f000e,
778         0x9238, 0xffffffff, 0x00120011,
779         0x923c, 0xffffffff, 0x000c000b,
780         0x9240, 0xffffffff, 0x000e000d,
781         0x9244, 0xffffffff, 0x00000011,
782         0x9248, 0xffffffff, 0x0010000f,
783         0x924c, 0xffffffff, 0x00130012,
784         0x9250, 0xffffffff, 0x000d000c,
785         0x9254, 0xffffffff, 0x000f000e,
786         0x9258, 0xffffffff, 0x00100013,
787         0x925c, 0xffffffff, 0x00120011,
788         0x9260, 0xffffffff, 0x00150014,
789         0x9150, 0xffffffff, 0x96940200,
790         0x8708, 0xffffffff, 0x00900100,
791         0xc478, 0xffffffff, 0x00000080,
792         0xc404, 0xffffffff, 0x0020003f,
793         0x30, 0xffffffff, 0x0000001c,
794         0x34, 0x000f0000, 0x000f0000,
795         0x160c, 0xffffffff, 0x00000100,
796         0x1024, 0xffffffff, 0x00000100,
797         0x102c, 0x00000101, 0x00000000,
798         0x20a8, 0xffffffff, 0x00000104,
799         0x55e4, 0xff000fff, 0x00000100,
800         0x55e8, 0x00000001, 0x00000001,
801         0x2f50, 0x00000001, 0x00000001,
802         0x30cc, 0xc0000fff, 0x00000104,
803         0xc1e4, 0x00000001, 0x00000001,
804         0xd0c0, 0xfffffff0, 0x00000100,
805         0xd8c0, 0xfffffff0, 0x00000100
806 };
807
808 static const u32 verde_mgcg_cgcg_init[] =
809 {
810         0xc400, 0xffffffff, 0xfffffffc,
811         0x802c, 0xffffffff, 0xe0000000,
812         0x9a60, 0xffffffff, 0x00000100,
813         0x92a4, 0xffffffff, 0x00000100,
814         0xc164, 0xffffffff, 0x00000100,
815         0x9774, 0xffffffff, 0x00000100,
816         0x8984, 0xffffffff, 0x06000100,
817         0x8a18, 0xffffffff, 0x00000100,
818         0x92a0, 0xffffffff, 0x00000100,
819         0xc380, 0xffffffff, 0x00000100,
820         0x8b28, 0xffffffff, 0x00000100,
821         0x9144, 0xffffffff, 0x00000100,
822         0x8d88, 0xffffffff, 0x00000100,
823         0x8d8c, 0xffffffff, 0x00000100,
824         0x9030, 0xffffffff, 0x00000100,
825         0x9034, 0xffffffff, 0x00000100,
826         0x9038, 0xffffffff, 0x00000100,
827         0x903c, 0xffffffff, 0x00000100,
828         0xad80, 0xffffffff, 0x00000100,
829         0xac54, 0xffffffff, 0x00000100,
830         0x897c, 0xffffffff, 0x06000100,
831         0x9868, 0xffffffff, 0x00000100,
832         0x9510, 0xffffffff, 0x00000100,
833         0xaf04, 0xffffffff, 0x00000100,
834         0xae04, 0xffffffff, 0x00000100,
835         0x949c, 0xffffffff, 0x00000100,
836         0x802c, 0xffffffff, 0xe0000000,
837         0x9160, 0xffffffff, 0x00010000,
838         0x9164, 0xffffffff, 0x00030002,
839         0x9168, 0xffffffff, 0x00040007,
840         0x916c, 0xffffffff, 0x00060005,
841         0x9170, 0xffffffff, 0x00090008,
842         0x9174, 0xffffffff, 0x00020001,
843         0x9178, 0xffffffff, 0x00040003,
844         0x917c, 0xffffffff, 0x00000007,
845         0x9180, 0xffffffff, 0x00060005,
846         0x9184, 0xffffffff, 0x00090008,
847         0x9188, 0xffffffff, 0x00030002,
848         0x918c, 0xffffffff, 0x00050004,
849         0x9190, 0xffffffff, 0x00000008,
850         0x9194, 0xffffffff, 0x00070006,
851         0x9198, 0xffffffff, 0x000a0009,
852         0x919c, 0xffffffff, 0x00040003,
853         0x91a0, 0xffffffff, 0x00060005,
854         0x91a4, 0xffffffff, 0x00000009,
855         0x91a8, 0xffffffff, 0x00080007,
856         0x91ac, 0xffffffff, 0x000b000a,
857         0x91b0, 0xffffffff, 0x00050004,
858         0x91b4, 0xffffffff, 0x00070006,
859         0x91b8, 0xffffffff, 0x0008000b,
860         0x91bc, 0xffffffff, 0x000a0009,
861         0x91c0, 0xffffffff, 0x000d000c,
862         0x9200, 0xffffffff, 0x00090008,
863         0x9204, 0xffffffff, 0x000b000a,
864         0x9208, 0xffffffff, 0x000c000f,
865         0x920c, 0xffffffff, 0x000e000d,
866         0x9210, 0xffffffff, 0x00110010,
867         0x9214, 0xffffffff, 0x000a0009,
868         0x9218, 0xffffffff, 0x000c000b,
869         0x921c, 0xffffffff, 0x0000000f,
870         0x9220, 0xffffffff, 0x000e000d,
871         0x9224, 0xffffffff, 0x00110010,
872         0x9228, 0xffffffff, 0x000b000a,
873         0x922c, 0xffffffff, 0x000d000c,
874         0x9230, 0xffffffff, 0x00000010,
875         0x9234, 0xffffffff, 0x000f000e,
876         0x9238, 0xffffffff, 0x00120011,
877         0x923c, 0xffffffff, 0x000c000b,
878         0x9240, 0xffffffff, 0x000e000d,
879         0x9244, 0xffffffff, 0x00000011,
880         0x9248, 0xffffffff, 0x0010000f,
881         0x924c, 0xffffffff, 0x00130012,
882         0x9250, 0xffffffff, 0x000d000c,
883         0x9254, 0xffffffff, 0x000f000e,
884         0x9258, 0xffffffff, 0x00100013,
885         0x925c, 0xffffffff, 0x00120011,
886         0x9260, 0xffffffff, 0x00150014,
887         0x9150, 0xffffffff, 0x96940200,
888         0x8708, 0xffffffff, 0x00900100,
889         0xc478, 0xffffffff, 0x00000080,
890         0xc404, 0xffffffff, 0x0020003f,
891         0x30, 0xffffffff, 0x0000001c,
892         0x34, 0x000f0000, 0x000f0000,
893         0x160c, 0xffffffff, 0x00000100,
894         0x1024, 0xffffffff, 0x00000100,
895         0x102c, 0x00000101, 0x00000000,
896         0x20a8, 0xffffffff, 0x00000104,
897         0x264c, 0x000c0000, 0x000c0000,
898         0x2648, 0x000c0000, 0x000c0000,
899         0x55e4, 0xff000fff, 0x00000100,
900         0x55e8, 0x00000001, 0x00000001,
901         0x2f50, 0x00000001, 0x00000001,
902         0x30cc, 0xc0000fff, 0x00000104,
903         0xc1e4, 0x00000001, 0x00000001,
904         0xd0c0, 0xfffffff0, 0x00000100,
905         0xd8c0, 0xfffffff0, 0x00000100
906 };
907
908 static const u32 oland_mgcg_cgcg_init[] =
909 {
910         0xc400, 0xffffffff, 0xfffffffc,
911         0x802c, 0xffffffff, 0xe0000000,
912         0x9a60, 0xffffffff, 0x00000100,
913         0x92a4, 0xffffffff, 0x00000100,
914         0xc164, 0xffffffff, 0x00000100,
915         0x9774, 0xffffffff, 0x00000100,
916         0x8984, 0xffffffff, 0x06000100,
917         0x8a18, 0xffffffff, 0x00000100,
918         0x92a0, 0xffffffff, 0x00000100,
919         0xc380, 0xffffffff, 0x00000100,
920         0x8b28, 0xffffffff, 0x00000100,
921         0x9144, 0xffffffff, 0x00000100,
922         0x8d88, 0xffffffff, 0x00000100,
923         0x8d8c, 0xffffffff, 0x00000100,
924         0x9030, 0xffffffff, 0x00000100,
925         0x9034, 0xffffffff, 0x00000100,
926         0x9038, 0xffffffff, 0x00000100,
927         0x903c, 0xffffffff, 0x00000100,
928         0xad80, 0xffffffff, 0x00000100,
929         0xac54, 0xffffffff, 0x00000100,
930         0x897c, 0xffffffff, 0x06000100,
931         0x9868, 0xffffffff, 0x00000100,
932         0x9510, 0xffffffff, 0x00000100,
933         0xaf04, 0xffffffff, 0x00000100,
934         0xae04, 0xffffffff, 0x00000100,
935         0x949c, 0xffffffff, 0x00000100,
936         0x802c, 0xffffffff, 0xe0000000,
937         0x9160, 0xffffffff, 0x00010000,
938         0x9164, 0xffffffff, 0x00030002,
939         0x9168, 0xffffffff, 0x00040007,
940         0x916c, 0xffffffff, 0x00060005,
941         0x9170, 0xffffffff, 0x00090008,
942         0x9174, 0xffffffff, 0x00020001,
943         0x9178, 0xffffffff, 0x00040003,
944         0x917c, 0xffffffff, 0x00000007,
945         0x9180, 0xffffffff, 0x00060005,
946         0x9184, 0xffffffff, 0x00090008,
947         0x9188, 0xffffffff, 0x00030002,
948         0x918c, 0xffffffff, 0x00050004,
949         0x9190, 0xffffffff, 0x00000008,
950         0x9194, 0xffffffff, 0x00070006,
951         0x9198, 0xffffffff, 0x000a0009,
952         0x919c, 0xffffffff, 0x00040003,
953         0x91a0, 0xffffffff, 0x00060005,
954         0x91a4, 0xffffffff, 0x00000009,
955         0x91a8, 0xffffffff, 0x00080007,
956         0x91ac, 0xffffffff, 0x000b000a,
957         0x91b0, 0xffffffff, 0x00050004,
958         0x91b4, 0xffffffff, 0x00070006,
959         0x91b8, 0xffffffff, 0x0008000b,
960         0x91bc, 0xffffffff, 0x000a0009,
961         0x91c0, 0xffffffff, 0x000d000c,
962         0x91c4, 0xffffffff, 0x00060005,
963         0x91c8, 0xffffffff, 0x00080007,
964         0x91cc, 0xffffffff, 0x0000000b,
965         0x91d0, 0xffffffff, 0x000a0009,
966         0x91d4, 0xffffffff, 0x000d000c,
967         0x9150, 0xffffffff, 0x96940200,
968         0x8708, 0xffffffff, 0x00900100,
969         0xc478, 0xffffffff, 0x00000080,
970         0xc404, 0xffffffff, 0x0020003f,
971         0x30, 0xffffffff, 0x0000001c,
972         0x34, 0x000f0000, 0x000f0000,
973         0x160c, 0xffffffff, 0x00000100,
974         0x1024, 0xffffffff, 0x00000100,
975         0x102c, 0x00000101, 0x00000000,
976         0x20a8, 0xffffffff, 0x00000104,
977         0x264c, 0x000c0000, 0x000c0000,
978         0x2648, 0x000c0000, 0x000c0000,
979         0x55e4, 0xff000fff, 0x00000100,
980         0x55e8, 0x00000001, 0x00000001,
981         0x2f50, 0x00000001, 0x00000001,
982         0x30cc, 0xc0000fff, 0x00000104,
983         0xc1e4, 0x00000001, 0x00000001,
984         0xd0c0, 0xfffffff0, 0x00000100,
985         0xd8c0, 0xfffffff0, 0x00000100
986 };
987
988 static const u32 hainan_mgcg_cgcg_init[] =
989 {
990         0xc400, 0xffffffff, 0xfffffffc,
991         0x802c, 0xffffffff, 0xe0000000,
992         0x9a60, 0xffffffff, 0x00000100,
993         0x92a4, 0xffffffff, 0x00000100,
994         0xc164, 0xffffffff, 0x00000100,
995         0x9774, 0xffffffff, 0x00000100,
996         0x8984, 0xffffffff, 0x06000100,
997         0x8a18, 0xffffffff, 0x00000100,
998         0x92a0, 0xffffffff, 0x00000100,
999         0xc380, 0xffffffff, 0x00000100,
1000         0x8b28, 0xffffffff, 0x00000100,
1001         0x9144, 0xffffffff, 0x00000100,
1002         0x8d88, 0xffffffff, 0x00000100,
1003         0x8d8c, 0xffffffff, 0x00000100,
1004         0x9030, 0xffffffff, 0x00000100,
1005         0x9034, 0xffffffff, 0x00000100,
1006         0x9038, 0xffffffff, 0x00000100,
1007         0x903c, 0xffffffff, 0x00000100,
1008         0xad80, 0xffffffff, 0x00000100,
1009         0xac54, 0xffffffff, 0x00000100,
1010         0x897c, 0xffffffff, 0x06000100,
1011         0x9868, 0xffffffff, 0x00000100,
1012         0x9510, 0xffffffff, 0x00000100,
1013         0xaf04, 0xffffffff, 0x00000100,
1014         0xae04, 0xffffffff, 0x00000100,
1015         0x949c, 0xffffffff, 0x00000100,
1016         0x802c, 0xffffffff, 0xe0000000,
1017         0x9160, 0xffffffff, 0x00010000,
1018         0x9164, 0xffffffff, 0x00030002,
1019         0x9168, 0xffffffff, 0x00040007,
1020         0x916c, 0xffffffff, 0x00060005,
1021         0x9170, 0xffffffff, 0x00090008,
1022         0x9174, 0xffffffff, 0x00020001,
1023         0x9178, 0xffffffff, 0x00040003,
1024         0x917c, 0xffffffff, 0x00000007,
1025         0x9180, 0xffffffff, 0x00060005,
1026         0x9184, 0xffffffff, 0x00090008,
1027         0x9188, 0xffffffff, 0x00030002,
1028         0x918c, 0xffffffff, 0x00050004,
1029         0x9190, 0xffffffff, 0x00000008,
1030         0x9194, 0xffffffff, 0x00070006,
1031         0x9198, 0xffffffff, 0x000a0009,
1032         0x919c, 0xffffffff, 0x00040003,
1033         0x91a0, 0xffffffff, 0x00060005,
1034         0x91a4, 0xffffffff, 0x00000009,
1035         0x91a8, 0xffffffff, 0x00080007,
1036         0x91ac, 0xffffffff, 0x000b000a,
1037         0x91b0, 0xffffffff, 0x00050004,
1038         0x91b4, 0xffffffff, 0x00070006,
1039         0x91b8, 0xffffffff, 0x0008000b,
1040         0x91bc, 0xffffffff, 0x000a0009,
1041         0x91c0, 0xffffffff, 0x000d000c,
1042         0x91c4, 0xffffffff, 0x00060005,
1043         0x91c8, 0xffffffff, 0x00080007,
1044         0x91cc, 0xffffffff, 0x0000000b,
1045         0x91d0, 0xffffffff, 0x000a0009,
1046         0x91d4, 0xffffffff, 0x000d000c,
1047         0x9150, 0xffffffff, 0x96940200,
1048         0x8708, 0xffffffff, 0x00900100,
1049         0xc478, 0xffffffff, 0x00000080,
1050         0xc404, 0xffffffff, 0x0020003f,
1051         0x30, 0xffffffff, 0x0000001c,
1052         0x34, 0x000f0000, 0x000f0000,
1053         0x160c, 0xffffffff, 0x00000100,
1054         0x1024, 0xffffffff, 0x00000100,
1055         0x20a8, 0xffffffff, 0x00000104,
1056         0x264c, 0x000c0000, 0x000c0000,
1057         0x2648, 0x000c0000, 0x000c0000,
1058         0x2f50, 0x00000001, 0x00000001,
1059         0x30cc, 0xc0000fff, 0x00000104,
1060         0xc1e4, 0x00000001, 0x00000001,
1061         0xd0c0, 0xfffffff0, 0x00000100,
1062         0xd8c0, 0xfffffff0, 0x00000100
1063 };
1064
1065 static u32 verde_pg_init[] =
1066 {
1067         0x353c, 0xffffffff, 0x40000,
1068         0x3538, 0xffffffff, 0x200010ff,
1069         0x353c, 0xffffffff, 0x0,
1070         0x353c, 0xffffffff, 0x0,
1071         0x353c, 0xffffffff, 0x0,
1072         0x353c, 0xffffffff, 0x0,
1073         0x353c, 0xffffffff, 0x0,
1074         0x353c, 0xffffffff, 0x7007,
1075         0x3538, 0xffffffff, 0x300010ff,
1076         0x353c, 0xffffffff, 0x0,
1077         0x353c, 0xffffffff, 0x0,
1078         0x353c, 0xffffffff, 0x0,
1079         0x353c, 0xffffffff, 0x0,
1080         0x353c, 0xffffffff, 0x0,
1081         0x353c, 0xffffffff, 0x400000,
1082         0x3538, 0xffffffff, 0x100010ff,
1083         0x353c, 0xffffffff, 0x0,
1084         0x353c, 0xffffffff, 0x0,
1085         0x353c, 0xffffffff, 0x0,
1086         0x353c, 0xffffffff, 0x0,
1087         0x353c, 0xffffffff, 0x0,
1088         0x353c, 0xffffffff, 0x120200,
1089         0x3538, 0xffffffff, 0x500010ff,
1090         0x353c, 0xffffffff, 0x0,
1091         0x353c, 0xffffffff, 0x0,
1092         0x353c, 0xffffffff, 0x0,
1093         0x353c, 0xffffffff, 0x0,
1094         0x353c, 0xffffffff, 0x0,
1095         0x353c, 0xffffffff, 0x1e1e16,
1096         0x3538, 0xffffffff, 0x600010ff,
1097         0x353c, 0xffffffff, 0x0,
1098         0x353c, 0xffffffff, 0x0,
1099         0x353c, 0xffffffff, 0x0,
1100         0x353c, 0xffffffff, 0x0,
1101         0x353c, 0xffffffff, 0x0,
1102         0x353c, 0xffffffff, 0x171f1e,
1103         0x3538, 0xffffffff, 0x700010ff,
1104         0x353c, 0xffffffff, 0x0,
1105         0x353c, 0xffffffff, 0x0,
1106         0x353c, 0xffffffff, 0x0,
1107         0x353c, 0xffffffff, 0x0,
1108         0x353c, 0xffffffff, 0x0,
1109         0x353c, 0xffffffff, 0x0,
1110         0x3538, 0xffffffff, 0x9ff,
1111         0x3500, 0xffffffff, 0x0,
1112         0x3504, 0xffffffff, 0x10000800,
1113         0x3504, 0xffffffff, 0xf,
1114         0x3504, 0xffffffff, 0xf,
1115         0x3500, 0xffffffff, 0x4,
1116         0x3504, 0xffffffff, 0x1000051e,
1117         0x3504, 0xffffffff, 0xffff,
1118         0x3504, 0xffffffff, 0xffff,
1119         0x3500, 0xffffffff, 0x8,
1120         0x3504, 0xffffffff, 0x80500,
1121         0x3500, 0xffffffff, 0x12,
1122         0x3504, 0xffffffff, 0x9050c,
1123         0x3500, 0xffffffff, 0x1d,
1124         0x3504, 0xffffffff, 0xb052c,
1125         0x3500, 0xffffffff, 0x2a,
1126         0x3504, 0xffffffff, 0x1053e,
1127         0x3500, 0xffffffff, 0x2d,
1128         0x3504, 0xffffffff, 0x10546,
1129         0x3500, 0xffffffff, 0x30,
1130         0x3504, 0xffffffff, 0xa054e,
1131         0x3500, 0xffffffff, 0x3c,
1132         0x3504, 0xffffffff, 0x1055f,
1133         0x3500, 0xffffffff, 0x3f,
1134         0x3504, 0xffffffff, 0x10567,
1135         0x3500, 0xffffffff, 0x42,
1136         0x3504, 0xffffffff, 0x1056f,
1137         0x3500, 0xffffffff, 0x45,
1138         0x3504, 0xffffffff, 0x10572,
1139         0x3500, 0xffffffff, 0x48,
1140         0x3504, 0xffffffff, 0x20575,
1141         0x3500, 0xffffffff, 0x4c,
1142         0x3504, 0xffffffff, 0x190801,
1143         0x3500, 0xffffffff, 0x67,
1144         0x3504, 0xffffffff, 0x1082a,
1145         0x3500, 0xffffffff, 0x6a,
1146         0x3504, 0xffffffff, 0x1b082d,
1147         0x3500, 0xffffffff, 0x87,
1148         0x3504, 0xffffffff, 0x310851,
1149         0x3500, 0xffffffff, 0xba,
1150         0x3504, 0xffffffff, 0x891,
1151         0x3500, 0xffffffff, 0xbc,
1152         0x3504, 0xffffffff, 0x893,
1153         0x3500, 0xffffffff, 0xbe,
1154         0x3504, 0xffffffff, 0x20895,
1155         0x3500, 0xffffffff, 0xc2,
1156         0x3504, 0xffffffff, 0x20899,
1157         0x3500, 0xffffffff, 0xc6,
1158         0x3504, 0xffffffff, 0x2089d,
1159         0x3500, 0xffffffff, 0xca,
1160         0x3504, 0xffffffff, 0x8a1,
1161         0x3500, 0xffffffff, 0xcc,
1162         0x3504, 0xffffffff, 0x8a3,
1163         0x3500, 0xffffffff, 0xce,
1164         0x3504, 0xffffffff, 0x308a5,
1165         0x3500, 0xffffffff, 0xd3,
1166         0x3504, 0xffffffff, 0x6d08cd,
1167         0x3500, 0xffffffff, 0x142,
1168         0x3504, 0xffffffff, 0x2000095a,
1169         0x3504, 0xffffffff, 0x1,
1170         0x3500, 0xffffffff, 0x144,
1171         0x3504, 0xffffffff, 0x301f095b,
1172         0x3500, 0xffffffff, 0x165,
1173         0x3504, 0xffffffff, 0xc094d,
1174         0x3500, 0xffffffff, 0x173,
1175         0x3504, 0xffffffff, 0xf096d,
1176         0x3500, 0xffffffff, 0x184,
1177         0x3504, 0xffffffff, 0x15097f,
1178         0x3500, 0xffffffff, 0x19b,
1179         0x3504, 0xffffffff, 0xc0998,
1180         0x3500, 0xffffffff, 0x1a9,
1181         0x3504, 0xffffffff, 0x409a7,
1182         0x3500, 0xffffffff, 0x1af,
1183         0x3504, 0xffffffff, 0xcdc,
1184         0x3500, 0xffffffff, 0x1b1,
1185         0x3504, 0xffffffff, 0x800,
1186         0x3508, 0xffffffff, 0x6c9b2000,
1187         0x3510, 0xfc00, 0x2000,
1188         0x3544, 0xffffffff, 0xfc0,
1189         0x28d4, 0x00000100, 0x100
1190 };
1191
1192 static void si_init_golden_registers(struct radeon_device *rdev)
1193 {
1194         switch (rdev->family) {
1195         case CHIP_TAHITI:
1196                 radeon_program_register_sequence(rdev,
1197                                                  tahiti_golden_registers,
1198                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1199                 radeon_program_register_sequence(rdev,
1200                                                  tahiti_golden_rlc_registers,
1201                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1202                 radeon_program_register_sequence(rdev,
1203                                                  tahiti_mgcg_cgcg_init,
1204                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1205                 radeon_program_register_sequence(rdev,
1206                                                  tahiti_golden_registers2,
1207                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1208                 break;
1209         case CHIP_PITCAIRN:
1210                 radeon_program_register_sequence(rdev,
1211                                                  pitcairn_golden_registers,
1212                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1213                 radeon_program_register_sequence(rdev,
1214                                                  pitcairn_golden_rlc_registers,
1215                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1216                 radeon_program_register_sequence(rdev,
1217                                                  pitcairn_mgcg_cgcg_init,
1218                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1219                 break;
1220         case CHIP_VERDE:
1221                 radeon_program_register_sequence(rdev,
1222                                                  verde_golden_registers,
1223                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1224                 radeon_program_register_sequence(rdev,
1225                                                  verde_golden_rlc_registers,
1226                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1227                 radeon_program_register_sequence(rdev,
1228                                                  verde_mgcg_cgcg_init,
1229                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1230                 radeon_program_register_sequence(rdev,
1231                                                  verde_pg_init,
1232                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1233                 break;
1234         case CHIP_OLAND:
1235                 radeon_program_register_sequence(rdev,
1236                                                  oland_golden_registers,
1237                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1238                 radeon_program_register_sequence(rdev,
1239                                                  oland_golden_rlc_registers,
1240                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1241                 radeon_program_register_sequence(rdev,
1242                                                  oland_mgcg_cgcg_init,
1243                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1244                 break;
1245         case CHIP_HAINAN:
1246                 radeon_program_register_sequence(rdev,
1247                                                  hainan_golden_registers,
1248                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1249                 radeon_program_register_sequence(rdev,
1250                                                  hainan_golden_registers2,
1251                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1252                 radeon_program_register_sequence(rdev,
1253                                                  hainan_mgcg_cgcg_init,
1254                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1255                 break;
1256         default:
1257                 break;
1258         }
1259 }
1260
1261 /**
1262  * si_get_allowed_info_register - fetch the register for the info ioctl
1263  *
1264  * @rdev: radeon_device pointer
1265  * @reg: register offset in bytes
1266  * @val: register value
1267  *
1268  * Returns 0 for success or -EINVAL for an invalid register
1269  *
1270  */
1271 int si_get_allowed_info_register(struct radeon_device *rdev,
1272                                  u32 reg, u32 *val)
1273 {
1274         switch (reg) {
1275         case GRBM_STATUS:
1276         case GRBM_STATUS2:
1277         case GRBM_STATUS_SE0:
1278         case GRBM_STATUS_SE1:
1279         case SRBM_STATUS:
1280         case SRBM_STATUS2:
1281         case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1282         case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1283         case UVD_STATUS:
1284                 *val = RREG32(reg);
1285                 return 0;
1286         default:
1287                 return -EINVAL;
1288         }
1289 }
1290
1291 #define PCIE_BUS_CLK                10000
1292 #define TCLK                        (PCIE_BUS_CLK / 10)
1293
1294 /**
1295  * si_get_xclk - get the xclk
1296  *
1297  * @rdev: radeon_device pointer
1298  *
1299  * Returns the reference clock used by the gfx engine
1300  * (SI).
1301  */
1302 u32 si_get_xclk(struct radeon_device *rdev)
1303 {
1304         u32 reference_clock = rdev->clock.spll.reference_freq;
1305         u32 tmp;
1306
1307         tmp = RREG32(CG_CLKPIN_CNTL_2);
1308         if (tmp & MUX_TCLK_TO_XCLK)
1309                 return TCLK;
1310
1311         tmp = RREG32(CG_CLKPIN_CNTL);
1312         if (tmp & XTALIN_DIVIDE)
1313                 return reference_clock / 4;
1314
1315         return reference_clock;
1316 }
1317
1318 /* get temperature in millidegrees */
1319 int si_get_temp(struct radeon_device *rdev)
1320 {
1321         u32 temp;
1322         int actual_temp = 0;
1323
1324         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1325                 CTF_TEMP_SHIFT;
1326
1327         if (temp & 0x200)
1328                 actual_temp = 255;
1329         else
1330                 actual_temp = temp & 0x1ff;
1331
1332         actual_temp = (actual_temp * 1000);
1333
1334         return actual_temp;
1335 }
1336
1337 #define TAHITI_IO_MC_REGS_SIZE 36
1338
1339 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1340         {0x0000006f, 0x03044000},
1341         {0x00000070, 0x0480c018},
1342         {0x00000071, 0x00000040},
1343         {0x00000072, 0x01000000},
1344         {0x00000074, 0x000000ff},
1345         {0x00000075, 0x00143400},
1346         {0x00000076, 0x08ec0800},
1347         {0x00000077, 0x040000cc},
1348         {0x00000079, 0x00000000},
1349         {0x0000007a, 0x21000409},
1350         {0x0000007c, 0x00000000},
1351         {0x0000007d, 0xe8000000},
1352         {0x0000007e, 0x044408a8},
1353         {0x0000007f, 0x00000003},
1354         {0x00000080, 0x00000000},
1355         {0x00000081, 0x01000000},
1356         {0x00000082, 0x02000000},
1357         {0x00000083, 0x00000000},
1358         {0x00000084, 0xe3f3e4f4},
1359         {0x00000085, 0x00052024},
1360         {0x00000087, 0x00000000},
1361         {0x00000088, 0x66036603},
1362         {0x00000089, 0x01000000},
1363         {0x0000008b, 0x1c0a0000},
1364         {0x0000008c, 0xff010000},
1365         {0x0000008e, 0xffffefff},
1366         {0x0000008f, 0xfff3efff},
1367         {0x00000090, 0xfff3efbf},
1368         {0x00000094, 0x00101101},
1369         {0x00000095, 0x00000fff},
1370         {0x00000096, 0x00116fff},
1371         {0x00000097, 0x60010000},
1372         {0x00000098, 0x10010000},
1373         {0x00000099, 0x00006000},
1374         {0x0000009a, 0x00001000},
1375         {0x0000009f, 0x00a77400}
1376 };
1377
1378 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1379         {0x0000006f, 0x03044000},
1380         {0x00000070, 0x0480c018},
1381         {0x00000071, 0x00000040},
1382         {0x00000072, 0x01000000},
1383         {0x00000074, 0x000000ff},
1384         {0x00000075, 0x00143400},
1385         {0x00000076, 0x08ec0800},
1386         {0x00000077, 0x040000cc},
1387         {0x00000079, 0x00000000},
1388         {0x0000007a, 0x21000409},
1389         {0x0000007c, 0x00000000},
1390         {0x0000007d, 0xe8000000},
1391         {0x0000007e, 0x044408a8},
1392         {0x0000007f, 0x00000003},
1393         {0x00000080, 0x00000000},
1394         {0x00000081, 0x01000000},
1395         {0x00000082, 0x02000000},
1396         {0x00000083, 0x00000000},
1397         {0x00000084, 0xe3f3e4f4},
1398         {0x00000085, 0x00052024},
1399         {0x00000087, 0x00000000},
1400         {0x00000088, 0x66036603},
1401         {0x00000089, 0x01000000},
1402         {0x0000008b, 0x1c0a0000},
1403         {0x0000008c, 0xff010000},
1404         {0x0000008e, 0xffffefff},
1405         {0x0000008f, 0xfff3efff},
1406         {0x00000090, 0xfff3efbf},
1407         {0x00000094, 0x00101101},
1408         {0x00000095, 0x00000fff},
1409         {0x00000096, 0x00116fff},
1410         {0x00000097, 0x60010000},
1411         {0x00000098, 0x10010000},
1412         {0x00000099, 0x00006000},
1413         {0x0000009a, 0x00001000},
1414         {0x0000009f, 0x00a47400}
1415 };
1416
1417 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1418         {0x0000006f, 0x03044000},
1419         {0x00000070, 0x0480c018},
1420         {0x00000071, 0x00000040},
1421         {0x00000072, 0x01000000},
1422         {0x00000074, 0x000000ff},
1423         {0x00000075, 0x00143400},
1424         {0x00000076, 0x08ec0800},
1425         {0x00000077, 0x040000cc},
1426         {0x00000079, 0x00000000},
1427         {0x0000007a, 0x21000409},
1428         {0x0000007c, 0x00000000},
1429         {0x0000007d, 0xe8000000},
1430         {0x0000007e, 0x044408a8},
1431         {0x0000007f, 0x00000003},
1432         {0x00000080, 0x00000000},
1433         {0x00000081, 0x01000000},
1434         {0x00000082, 0x02000000},
1435         {0x00000083, 0x00000000},
1436         {0x00000084, 0xe3f3e4f4},
1437         {0x00000085, 0x00052024},
1438         {0x00000087, 0x00000000},
1439         {0x00000088, 0x66036603},
1440         {0x00000089, 0x01000000},
1441         {0x0000008b, 0x1c0a0000},
1442         {0x0000008c, 0xff010000},
1443         {0x0000008e, 0xffffefff},
1444         {0x0000008f, 0xfff3efff},
1445         {0x00000090, 0xfff3efbf},
1446         {0x00000094, 0x00101101},
1447         {0x00000095, 0x00000fff},
1448         {0x00000096, 0x00116fff},
1449         {0x00000097, 0x60010000},
1450         {0x00000098, 0x10010000},
1451         {0x00000099, 0x00006000},
1452         {0x0000009a, 0x00001000},
1453         {0x0000009f, 0x00a37400}
1454 };
1455
1456 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1457         {0x0000006f, 0x03044000},
1458         {0x00000070, 0x0480c018},
1459         {0x00000071, 0x00000040},
1460         {0x00000072, 0x01000000},
1461         {0x00000074, 0x000000ff},
1462         {0x00000075, 0x00143400},
1463         {0x00000076, 0x08ec0800},
1464         {0x00000077, 0x040000cc},
1465         {0x00000079, 0x00000000},
1466         {0x0000007a, 0x21000409},
1467         {0x0000007c, 0x00000000},
1468         {0x0000007d, 0xe8000000},
1469         {0x0000007e, 0x044408a8},
1470         {0x0000007f, 0x00000003},
1471         {0x00000080, 0x00000000},
1472         {0x00000081, 0x01000000},
1473         {0x00000082, 0x02000000},
1474         {0x00000083, 0x00000000},
1475         {0x00000084, 0xe3f3e4f4},
1476         {0x00000085, 0x00052024},
1477         {0x00000087, 0x00000000},
1478         {0x00000088, 0x66036603},
1479         {0x00000089, 0x01000000},
1480         {0x0000008b, 0x1c0a0000},
1481         {0x0000008c, 0xff010000},
1482         {0x0000008e, 0xffffefff},
1483         {0x0000008f, 0xfff3efff},
1484         {0x00000090, 0xfff3efbf},
1485         {0x00000094, 0x00101101},
1486         {0x00000095, 0x00000fff},
1487         {0x00000096, 0x00116fff},
1488         {0x00000097, 0x60010000},
1489         {0x00000098, 0x10010000},
1490         {0x00000099, 0x00006000},
1491         {0x0000009a, 0x00001000},
1492         {0x0000009f, 0x00a17730}
1493 };
1494
1495 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1496         {0x0000006f, 0x03044000},
1497         {0x00000070, 0x0480c018},
1498         {0x00000071, 0x00000040},
1499         {0x00000072, 0x01000000},
1500         {0x00000074, 0x000000ff},
1501         {0x00000075, 0x00143400},
1502         {0x00000076, 0x08ec0800},
1503         {0x00000077, 0x040000cc},
1504         {0x00000079, 0x00000000},
1505         {0x0000007a, 0x21000409},
1506         {0x0000007c, 0x00000000},
1507         {0x0000007d, 0xe8000000},
1508         {0x0000007e, 0x044408a8},
1509         {0x0000007f, 0x00000003},
1510         {0x00000080, 0x00000000},
1511         {0x00000081, 0x01000000},
1512         {0x00000082, 0x02000000},
1513         {0x00000083, 0x00000000},
1514         {0x00000084, 0xe3f3e4f4},
1515         {0x00000085, 0x00052024},
1516         {0x00000087, 0x00000000},
1517         {0x00000088, 0x66036603},
1518         {0x00000089, 0x01000000},
1519         {0x0000008b, 0x1c0a0000},
1520         {0x0000008c, 0xff010000},
1521         {0x0000008e, 0xffffefff},
1522         {0x0000008f, 0xfff3efff},
1523         {0x00000090, 0xfff3efbf},
1524         {0x00000094, 0x00101101},
1525         {0x00000095, 0x00000fff},
1526         {0x00000096, 0x00116fff},
1527         {0x00000097, 0x60010000},
1528         {0x00000098, 0x10010000},
1529         {0x00000099, 0x00006000},
1530         {0x0000009a, 0x00001000},
1531         {0x0000009f, 0x00a07730}
1532 };
1533
1534 /* ucode loading */
1535 int si_mc_load_microcode(struct radeon_device *rdev)
1536 {
1537         const __be32 *fw_data = NULL;
1538         const __le32 *new_fw_data = NULL;
1539         u32 running;
1540         u32 *io_mc_regs = NULL;
1541         const __le32 *new_io_mc_regs = NULL;
1542         int i, regs_size, ucode_size;
1543
1544         if (!rdev->mc_fw)
1545                 return -EINVAL;
1546
1547         if (rdev->new_fw) {
1548                 const struct mc_firmware_header_v1_0 *hdr =
1549                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1550
1551                 radeon_ucode_print_mc_hdr(&hdr->header);
1552                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1553                 new_io_mc_regs = (const __le32 *)
1554                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1555                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1556                 new_fw_data = (const __le32 *)
1557                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1558         } else {
1559                 ucode_size = rdev->mc_fw->datasize / 4;
1560
1561                 switch (rdev->family) {
1562                 case CHIP_TAHITI:
1563                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1564                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1565                         break;
1566                 case CHIP_PITCAIRN:
1567                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1568                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1569                         break;
1570                 case CHIP_VERDE:
1571                 default:
1572                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1573                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1574                         break;
1575                 case CHIP_OLAND:
1576                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1577                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1578                         break;
1579                 case CHIP_HAINAN:
1580                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1581                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1582                         break;
1583                 }
1584                 fw_data = (const __be32 *)rdev->mc_fw->data;
1585         }
1586
1587         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1588
1589         if (running == 0) {
1590                 /* reset the engine and set to writable */
1591                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1592                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1593
1594                 /* load mc io regs */
1595                 for (i = 0; i < regs_size; i++) {
1596                         if (rdev->new_fw) {
1597                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1598                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1599                         } else {
1600                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1601                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1602                         }
1603                 }
1604                 /* load the MC ucode */
1605                 for (i = 0; i < ucode_size; i++) {
1606                         if (rdev->new_fw)
1607                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1608                         else
1609                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1610                 }
1611
1612                 /* put the engine back into the active state */
1613                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1614                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1615                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1616
1617                 /* wait for training to complete */
1618                 for (i = 0; i < rdev->usec_timeout; i++) {
1619                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1620                                 break;
1621                         udelay(1);
1622                 }
1623                 for (i = 0; i < rdev->usec_timeout; i++) {
1624                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1625                                 break;
1626                         udelay(1);
1627                 }
1628         }
1629
1630         return 0;
1631 }
1632
1633 static int si_init_microcode(struct radeon_device *rdev)
1634 {
1635         const char *chip_name;
1636         const char *new_chip_name;
1637         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1638         size_t smc_req_size, mc2_req_size;
1639         char fw_name[30];
1640         int err;
1641         int new_fw = 0;
1642         bool new_smc = false;
1643
1644         DRM_DEBUG("\n");
1645
1646         switch (rdev->family) {
1647         case CHIP_TAHITI:
1648                 chip_name = "TAHITI";
1649                 /* XXX: figure out which Tahitis need the new ucode */
1650                 if (0)
1651                         new_smc = true;
1652                 new_chip_name = "tahiti";
1653                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1654                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1655                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1656                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1657                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1658                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1659                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1660                 break;
1661         case CHIP_PITCAIRN:
1662                 chip_name = "PITCAIRN";
1663                 if ((rdev->pdev->revision == 0x81) ||
1664                     (rdev->pdev->device == 0x6810) ||
1665                     (rdev->pdev->device == 0x6811) ||
1666                     (rdev->pdev->device == 0x6816) ||
1667                     (rdev->pdev->device == 0x6817) ||
1668                     (rdev->pdev->device == 0x6806))
1669                         new_smc = true;
1670                 new_chip_name = "pitcairn";
1671                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1672                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1673                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1674                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1675                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1676                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1677                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1678                 break;
1679         case CHIP_VERDE:
1680                 chip_name = "VERDE";
1681                 if ((rdev->pdev->revision == 0x81) ||
1682                     (rdev->pdev->revision == 0x83) ||
1683                     (rdev->pdev->revision == 0x87) ||
1684                     (rdev->pdev->device == 0x6820) ||
1685                     (rdev->pdev->device == 0x6821) ||
1686                     (rdev->pdev->device == 0x6822) ||
1687                     (rdev->pdev->device == 0x6823) ||
1688                     (rdev->pdev->device == 0x682A) ||
1689                     (rdev->pdev->device == 0x682B))
1690                         new_smc = true;
1691                 new_chip_name = "verde";
1692                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1693                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1694                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1695                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1696                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1697                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1698                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1699                 break;
1700         case CHIP_OLAND:
1701                 chip_name = "OLAND";
1702                 if ((rdev->pdev->revision == 0xC7) ||
1703                     (rdev->pdev->revision == 0x80) ||
1704                     (rdev->pdev->revision == 0x81) ||
1705                     (rdev->pdev->revision == 0x83) ||
1706                     (rdev->pdev->device == 0x6604) ||
1707                     (rdev->pdev->device == 0x6605))
1708                         new_smc = true;
1709                 new_chip_name = "oland";
1710                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1711                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1712                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1713                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1714                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1715                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1716                 break;
1717         case CHIP_HAINAN:
1718                 chip_name = "HAINAN";
1719                 if ((rdev->pdev->revision == 0x81) ||
1720                     (rdev->pdev->revision == 0x83) ||
1721                     (rdev->pdev->revision == 0xC3) ||
1722                     (rdev->pdev->device == 0x6664) ||
1723                     (rdev->pdev->device == 0x6665) ||
1724                     (rdev->pdev->device == 0x6667))
1725                         new_smc = true;
1726                 new_chip_name = "hainan";
1727                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1728                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1729                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1730                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1731                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1732                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1733                 break;
1734         default: BUG();
1735         }
1736
1737         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1738
1739         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1740         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1741         if (err) {
1742                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1743                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1744                 if (err)
1745                         goto out;
1746                 if (rdev->pfp_fw->datasize != pfp_req_size) {
1747                         printk(KERN_ERR
1748                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1749                                rdev->pfp_fw->datasize, fw_name);
1750                         err = -EINVAL;
1751                         goto out;
1752                 }
1753         } else {
1754                 err = radeon_ucode_validate(rdev->pfp_fw);
1755                 if (err) {
1756                         printk(KERN_ERR
1757                                "si_cp: validation failed for firmware \"%s\"\n",
1758                                fw_name);
1759                         goto out;
1760                 } else {
1761                         new_fw++;
1762                 }
1763         }
1764
1765         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
1766         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1767         if (err) {
1768                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1769                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1770                 if (err)
1771                         goto out;
1772                 if (rdev->me_fw->datasize != me_req_size) {
1773                         printk(KERN_ERR
1774                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1775                                rdev->me_fw->datasize, fw_name);
1776                         err = -EINVAL;
1777                 }
1778         } else {
1779                 err = radeon_ucode_validate(rdev->me_fw);
1780                 if (err) {
1781                         printk(KERN_ERR
1782                                "si_cp: validation failed for firmware \"%s\"\n",
1783                                fw_name);
1784                         goto out;
1785                 } else {
1786                         new_fw++;
1787                 }
1788         }
1789
1790         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
1791         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1792         if (err) {
1793                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1794                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1795                 if (err)
1796                         goto out;
1797                 if (rdev->ce_fw->datasize != ce_req_size) {
1798                         printk(KERN_ERR
1799                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1800                                rdev->ce_fw->datasize, fw_name);
1801                         err = -EINVAL;
1802                 }
1803         } else {
1804                 err = radeon_ucode_validate(rdev->ce_fw);
1805                 if (err) {
1806                         printk(KERN_ERR
1807                                "si_cp: validation failed for firmware \"%s\"\n",
1808                                fw_name);
1809                         goto out;
1810                 } else {
1811                         new_fw++;
1812                 }
1813         }
1814
1815         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
1816         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1817         if (err) {
1818                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1819                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1820                 if (err)
1821                         goto out;
1822                 if (rdev->rlc_fw->datasize != rlc_req_size) {
1823                         printk(KERN_ERR
1824                                "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1825                                rdev->rlc_fw->datasize, fw_name);
1826                         err = -EINVAL;
1827                 }
1828         } else {
1829                 err = radeon_ucode_validate(rdev->rlc_fw);
1830                 if (err) {
1831                         printk(KERN_ERR
1832                                "si_cp: validation failed for firmware \"%s\"\n",
1833                                fw_name);
1834                         goto out;
1835                 } else {
1836                         new_fw++;
1837                 }
1838         }
1839
1840         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
1841         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1842         if (err) {
1843                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1844                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1845                 if (err) {
1846                         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1847                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1848                         if (err)
1849                                 goto out;
1850                 }
1851                 if ((rdev->mc_fw->datasize != mc_req_size) &&
1852                     (rdev->mc_fw->datasize != mc2_req_size)) {
1853                         printk(KERN_ERR
1854                                "si_mc: Bogus length %zu in firmware \"%s\"\n",
1855                                rdev->mc_fw->datasize, fw_name);
1856                         err = -EINVAL;
1857                 }
1858                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1859         } else {
1860                 err = radeon_ucode_validate(rdev->mc_fw);
1861                 if (err) {
1862                         printk(KERN_ERR
1863                                "si_cp: validation failed for firmware \"%s\"\n",
1864                                fw_name);
1865                         goto out;
1866                 } else {
1867                         new_fw++;
1868                 }
1869         }
1870
1871         if (new_smc)
1872                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_k_smc", new_chip_name);
1873         else
1874                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
1875         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1876         if (err) {
1877                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1878                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1879                 if (err) {
1880                         printk(KERN_ERR
1881                                "smc: error loading firmware \"%s\"\n",
1882                                fw_name);
1883                         release_firmware(rdev->smc_fw);
1884                         rdev->smc_fw = NULL;
1885                         err = 0;
1886                 } else if (rdev->smc_fw->datasize != smc_req_size) {
1887                         printk(KERN_ERR
1888                                "si_smc: Bogus length %zu in firmware \"%s\"\n",
1889                                rdev->smc_fw->datasize, fw_name);
1890                         err = -EINVAL;
1891                 }
1892         } else {
1893                 err = radeon_ucode_validate(rdev->smc_fw);
1894                 if (err) {
1895                         printk(KERN_ERR
1896                                "si_cp: validation failed for firmware \"%s\"\n",
1897                                fw_name);
1898                         goto out;
1899                 } else {
1900                         new_fw++;
1901                 }
1902         }
1903
1904         if (new_fw == 0) {
1905                 rdev->new_fw = false;
1906         } else if (new_fw < 6) {
1907                 printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1908                 err = -EINVAL;
1909         } else {
1910                 rdev->new_fw = true;
1911         }
1912 out:
1913         if (err) {
1914                 if (err != -EINVAL)
1915                         printk(KERN_ERR
1916                                "si_cp: Failed to load firmware \"%s\"\n",
1917                                fw_name);
1918                 release_firmware(rdev->pfp_fw);
1919                 rdev->pfp_fw = NULL;
1920                 release_firmware(rdev->me_fw);
1921                 rdev->me_fw = NULL;
1922                 release_firmware(rdev->ce_fw);
1923                 rdev->ce_fw = NULL;
1924                 release_firmware(rdev->rlc_fw);
1925                 rdev->rlc_fw = NULL;
1926                 release_firmware(rdev->mc_fw);
1927                 rdev->mc_fw = NULL;
1928                 release_firmware(rdev->smc_fw);
1929                 rdev->smc_fw = NULL;
1930         }
1931         return err;
1932 }
1933
1934 /**
1935  * si_fini_microcode - drop the firmwares image references
1936  *
1937  * @rdev: radeon_device pointer
1938  *
1939  * Drop the pfp, me, rlc, mc and ce firmware image references.
1940  * Called at driver shutdown.
1941  */
1942 static void si_fini_microcode(struct radeon_device *rdev)
1943 {
1944         release_firmware(rdev->pfp_fw);
1945         rdev->pfp_fw = NULL;
1946         release_firmware(rdev->me_fw);
1947         rdev->me_fw = NULL;
1948         release_firmware(rdev->rlc_fw);
1949         rdev->rlc_fw = NULL;
1950         release_firmware(rdev->mc_fw);
1951         rdev->mc_fw = NULL;
1952         release_firmware(rdev->smc_fw);
1953         rdev->smc_fw = NULL;
1954         release_firmware(rdev->ce_fw);
1955         rdev->ce_fw = NULL;
1956 }
1957
1958 /* watermark setup */
1959 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1960                                    struct radeon_crtc *radeon_crtc,
1961                                    struct drm_display_mode *mode,
1962                                    struct drm_display_mode *other_mode)
1963 {
1964         u32 tmp, buffer_alloc, i;
1965         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1966         /*
1967          * Line Buffer Setup
1968          * There are 3 line buffers, each one shared by 2 display controllers.
1969          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1970          * the display controllers.  The paritioning is done via one of four
1971          * preset allocations specified in bits 21:20:
1972          *  0 - half lb
1973          *  2 - whole lb, other crtc must be disabled
1974          */
1975         /* this can get tricky if we have two large displays on a paired group
1976          * of crtcs.  Ideally for multiple large displays we'd assign them to
1977          * non-linked crtcs for maximum line buffer allocation.
1978          */
1979         if (radeon_crtc->base.enabled && mode) {
1980                 if (other_mode) {
1981                         tmp = 0; /* 1/2 */
1982                         buffer_alloc = 1;
1983                 } else {
1984                         tmp = 2; /* whole */
1985                         buffer_alloc = 2;
1986                 }
1987         } else {
1988                 tmp = 0;
1989                 buffer_alloc = 0;
1990         }
1991
1992         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1993                DC_LB_MEMORY_CONFIG(tmp));
1994
1995         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1996                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1997         for (i = 0; i < rdev->usec_timeout; i++) {
1998                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1999                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
2000                         break;
2001                 udelay(1);
2002         }
2003
2004         if (radeon_crtc->base.enabled && mode) {
2005                 switch (tmp) {
2006                 case 0:
2007                 default:
2008                         return 4096 * 2;
2009                 case 2:
2010                         return 8192 * 2;
2011                 }
2012         }
2013
2014         /* controller not enabled, so no lb used */
2015         return 0;
2016 }
2017
2018 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2019 {
2020         u32 tmp = RREG32(MC_SHARED_CHMAP);
2021
2022         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2023         case 0:
2024         default:
2025                 return 1;
2026         case 1:
2027                 return 2;
2028         case 2:
2029                 return 4;
2030         case 3:
2031                 return 8;
2032         case 4:
2033                 return 3;
2034         case 5:
2035                 return 6;
2036         case 6:
2037                 return 10;
2038         case 7:
2039                 return 12;
2040         case 8:
2041                 return 16;
2042         }
2043 }
2044
2045 struct dce6_wm_params {
2046         u32 dram_channels; /* number of dram channels */
2047         u32 yclk;          /* bandwidth per dram data pin in kHz */
2048         u32 sclk;          /* engine clock in kHz */
2049         u32 disp_clk;      /* display clock in kHz */
2050         u32 src_width;     /* viewport width */
2051         u32 active_time;   /* active display time in ns */
2052         u32 blank_time;    /* blank time in ns */
2053         bool interlaced;    /* mode is interlaced */
2054         fixed20_12 vsc;    /* vertical scale ratio */
2055         u32 num_heads;     /* number of active crtcs */
2056         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2057         u32 lb_size;       /* line buffer allocated to pipe */
2058         u32 vtaps;         /* vertical scaler taps */
2059 };
2060
2061 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2062 {
2063         /* Calculate raw DRAM Bandwidth */
2064         fixed20_12 dram_efficiency; /* 0.7 */
2065         fixed20_12 yclk, dram_channels, bandwidth;
2066         fixed20_12 a;
2067
2068         a.full = dfixed_const(1000);
2069         yclk.full = dfixed_const(wm->yclk);
2070         yclk.full = dfixed_div(yclk, a);
2071         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2072         a.full = dfixed_const(10);
2073         dram_efficiency.full = dfixed_const(7);
2074         dram_efficiency.full = dfixed_div(dram_efficiency, a);
2075         bandwidth.full = dfixed_mul(dram_channels, yclk);
2076         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2077
2078         return dfixed_trunc(bandwidth);
2079 }
2080
2081 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2082 {
2083         /* Calculate DRAM Bandwidth and the part allocated to display. */
2084         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2085         fixed20_12 yclk, dram_channels, bandwidth;
2086         fixed20_12 a;
2087
2088         a.full = dfixed_const(1000);
2089         yclk.full = dfixed_const(wm->yclk);
2090         yclk.full = dfixed_div(yclk, a);
2091         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2092         a.full = dfixed_const(10);
2093         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2094         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2095         bandwidth.full = dfixed_mul(dram_channels, yclk);
2096         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2097
2098         return dfixed_trunc(bandwidth);
2099 }
2100
2101 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2102 {
2103         /* Calculate the display Data return Bandwidth */
2104         fixed20_12 return_efficiency; /* 0.8 */
2105         fixed20_12 sclk, bandwidth;
2106         fixed20_12 a;
2107
2108         a.full = dfixed_const(1000);
2109         sclk.full = dfixed_const(wm->sclk);
2110         sclk.full = dfixed_div(sclk, a);
2111         a.full = dfixed_const(10);
2112         return_efficiency.full = dfixed_const(8);
2113         return_efficiency.full = dfixed_div(return_efficiency, a);
2114         a.full = dfixed_const(32);
2115         bandwidth.full = dfixed_mul(a, sclk);
2116         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2117
2118         return dfixed_trunc(bandwidth);
2119 }
2120
2121 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2122 {
2123         return 32;
2124 }
2125
2126 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2127 {
2128         /* Calculate the DMIF Request Bandwidth */
2129         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2130         fixed20_12 disp_clk, sclk, bandwidth;
2131         fixed20_12 a, b1, b2;
2132         u32 min_bandwidth;
2133
2134         a.full = dfixed_const(1000);
2135         disp_clk.full = dfixed_const(wm->disp_clk);
2136         disp_clk.full = dfixed_div(disp_clk, a);
2137         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2138         b1.full = dfixed_mul(a, disp_clk);
2139
2140         a.full = dfixed_const(1000);
2141         sclk.full = dfixed_const(wm->sclk);
2142         sclk.full = dfixed_div(sclk, a);
2143         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2144         b2.full = dfixed_mul(a, sclk);
2145
2146         a.full = dfixed_const(10);
2147         disp_clk_request_efficiency.full = dfixed_const(8);
2148         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2149
2150         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2151
2152         a.full = dfixed_const(min_bandwidth);
2153         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2154
2155         return dfixed_trunc(bandwidth);
2156 }
2157
2158 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2159 {
2160         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2161         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2162         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2163         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2164
2165         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2166 }
2167
2168 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2169 {
2170         /* Calculate the display mode Average Bandwidth
2171          * DisplayMode should contain the source and destination dimensions,
2172          * timing, etc.
2173          */
2174         fixed20_12 bpp;
2175         fixed20_12 line_time;
2176         fixed20_12 src_width;
2177         fixed20_12 bandwidth;
2178         fixed20_12 a;
2179
2180         a.full = dfixed_const(1000);
2181         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2182         line_time.full = dfixed_div(line_time, a);
2183         bpp.full = dfixed_const(wm->bytes_per_pixel);
2184         src_width.full = dfixed_const(wm->src_width);
2185         bandwidth.full = dfixed_mul(src_width, bpp);
2186         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2187         bandwidth.full = dfixed_div(bandwidth, line_time);
2188
2189         return dfixed_trunc(bandwidth);
2190 }
2191
2192 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2193 {
2194         /* First calcualte the latency in ns */
2195         u32 mc_latency = 2000; /* 2000 ns. */
2196         u32 available_bandwidth = dce6_available_bandwidth(wm);
2197         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2198         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2199         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2200         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2201                 (wm->num_heads * cursor_line_pair_return_time);
2202         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2203         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2204         u32 tmp, dmif_size = 12288;
2205         fixed20_12 a, b, c;
2206
2207         if (wm->num_heads == 0)
2208                 return 0;
2209
2210         a.full = dfixed_const(2);
2211         b.full = dfixed_const(1);
2212         if ((wm->vsc.full > a.full) ||
2213             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2214             (wm->vtaps >= 5) ||
2215             ((wm->vsc.full >= a.full) && wm->interlaced))
2216                 max_src_lines_per_dst_line = 4;
2217         else
2218                 max_src_lines_per_dst_line = 2;
2219
2220         a.full = dfixed_const(available_bandwidth);
2221         b.full = dfixed_const(wm->num_heads);
2222         a.full = dfixed_div(a, b);
2223
2224         b.full = dfixed_const(mc_latency + 512);
2225         c.full = dfixed_const(wm->disp_clk);
2226         b.full = dfixed_div(b, c);
2227
2228         c.full = dfixed_const(dmif_size);
2229         b.full = dfixed_div(c, b);
2230
2231         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2232
2233         b.full = dfixed_const(1000);
2234         c.full = dfixed_const(wm->disp_clk);
2235         b.full = dfixed_div(c, b);
2236         c.full = dfixed_const(wm->bytes_per_pixel);
2237         b.full = dfixed_mul(b, c);
2238
2239         lb_fill_bw = min(tmp, dfixed_trunc(b));
2240
2241         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2242         b.full = dfixed_const(1000);
2243         c.full = dfixed_const(lb_fill_bw);
2244         b.full = dfixed_div(c, b);
2245         a.full = dfixed_div(a, b);
2246         line_fill_time = dfixed_trunc(a);
2247
2248         if (line_fill_time < wm->active_time)
2249                 return latency;
2250         else
2251                 return latency + (line_fill_time - wm->active_time);
2252
2253 }
2254
2255 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2256 {
2257         if (dce6_average_bandwidth(wm) <=
2258             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2259                 return true;
2260         else
2261                 return false;
2262 };
2263
2264 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2265 {
2266         if (dce6_average_bandwidth(wm) <=
2267             (dce6_available_bandwidth(wm) / wm->num_heads))
2268                 return true;
2269         else
2270                 return false;
2271 };
2272
2273 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2274 {
2275         u32 lb_partitions = wm->lb_size / wm->src_width;
2276         u32 line_time = wm->active_time + wm->blank_time;
2277         u32 latency_tolerant_lines;
2278         u32 latency_hiding;
2279         fixed20_12 a;
2280
2281         a.full = dfixed_const(1);
2282         if (wm->vsc.full > a.full)
2283                 latency_tolerant_lines = 1;
2284         else {
2285                 if (lb_partitions <= (wm->vtaps + 1))
2286                         latency_tolerant_lines = 1;
2287                 else
2288                         latency_tolerant_lines = 2;
2289         }
2290
2291         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2292
2293         if (dce6_latency_watermark(wm) <= latency_hiding)
2294                 return true;
2295         else
2296                 return false;
2297 }
2298
2299 static void dce6_program_watermarks(struct radeon_device *rdev,
2300                                          struct radeon_crtc *radeon_crtc,
2301                                          u32 lb_size, u32 num_heads)
2302 {
2303         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2304         struct dce6_wm_params wm_low, wm_high;
2305         u32 dram_channels;
2306         u32 pixel_period;
2307         u32 line_time = 0;
2308         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2309         u32 priority_a_mark = 0, priority_b_mark = 0;
2310         u32 priority_a_cnt = PRIORITY_OFF;
2311         u32 priority_b_cnt = PRIORITY_OFF;
2312         u32 tmp, arb_control3;
2313         fixed20_12 a, b, c;
2314
2315         if (radeon_crtc->base.enabled && num_heads && mode) {
2316                 pixel_period = 1000000 / (u32)mode->clock;
2317                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2318                 priority_a_cnt = 0;
2319                 priority_b_cnt = 0;
2320
2321                 if (rdev->family == CHIP_ARUBA)
2322                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2323                 else
2324                         dram_channels = si_get_number_of_dram_channels(rdev);
2325
2326                 /* watermark for high clocks */
2327                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2328                         wm_high.yclk =
2329                                 radeon_dpm_get_mclk(rdev, false) * 10;
2330                         wm_high.sclk =
2331                                 radeon_dpm_get_sclk(rdev, false) * 10;
2332                 } else {
2333                         wm_high.yclk = rdev->pm.current_mclk * 10;
2334                         wm_high.sclk = rdev->pm.current_sclk * 10;
2335                 }
2336
2337                 wm_high.disp_clk = mode->clock;
2338                 wm_high.src_width = mode->crtc_hdisplay;
2339                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2340                 wm_high.blank_time = line_time - wm_high.active_time;
2341                 wm_high.interlaced = false;
2342                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2343                         wm_high.interlaced = true;
2344                 wm_high.vsc = radeon_crtc->vsc;
2345                 wm_high.vtaps = 1;
2346                 if (radeon_crtc->rmx_type != RMX_OFF)
2347                         wm_high.vtaps = 2;
2348                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2349                 wm_high.lb_size = lb_size;
2350                 wm_high.dram_channels = dram_channels;
2351                 wm_high.num_heads = num_heads;
2352
2353                 /* watermark for low clocks */
2354                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2355                         wm_low.yclk =
2356                                 radeon_dpm_get_mclk(rdev, true) * 10;
2357                         wm_low.sclk =
2358                                 radeon_dpm_get_sclk(rdev, true) * 10;
2359                 } else {
2360                         wm_low.yclk = rdev->pm.current_mclk * 10;
2361                         wm_low.sclk = rdev->pm.current_sclk * 10;
2362                 }
2363
2364                 wm_low.disp_clk = mode->clock;
2365                 wm_low.src_width = mode->crtc_hdisplay;
2366                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2367                 wm_low.blank_time = line_time - wm_low.active_time;
2368                 wm_low.interlaced = false;
2369                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2370                         wm_low.interlaced = true;
2371                 wm_low.vsc = radeon_crtc->vsc;
2372                 wm_low.vtaps = 1;
2373                 if (radeon_crtc->rmx_type != RMX_OFF)
2374                         wm_low.vtaps = 2;
2375                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2376                 wm_low.lb_size = lb_size;
2377                 wm_low.dram_channels = dram_channels;
2378                 wm_low.num_heads = num_heads;
2379
2380                 /* set for high clocks */
2381                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2382                 /* set for low clocks */
2383                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2384
2385                 /* possibly force display priority to high */
2386                 /* should really do this at mode validation time... */
2387                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2388                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2389                     !dce6_check_latency_hiding(&wm_high) ||
2390                     (rdev->disp_priority == 2)) {
2391                         DRM_DEBUG_KMS("force priority to high\n");
2392                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2393                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2394                 }
2395                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2396                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2397                     !dce6_check_latency_hiding(&wm_low) ||
2398                     (rdev->disp_priority == 2)) {
2399                         DRM_DEBUG_KMS("force priority to high\n");
2400                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2401                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2402                 }
2403
2404                 a.full = dfixed_const(1000);
2405                 b.full = dfixed_const(mode->clock);
2406                 b.full = dfixed_div(b, a);
2407                 c.full = dfixed_const(latency_watermark_a);
2408                 c.full = dfixed_mul(c, b);
2409                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2410                 c.full = dfixed_div(c, a);
2411                 a.full = dfixed_const(16);
2412                 c.full = dfixed_div(c, a);
2413                 priority_a_mark = dfixed_trunc(c);
2414                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2415
2416                 a.full = dfixed_const(1000);
2417                 b.full = dfixed_const(mode->clock);
2418                 b.full = dfixed_div(b, a);
2419                 c.full = dfixed_const(latency_watermark_b);
2420                 c.full = dfixed_mul(c, b);
2421                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2422                 c.full = dfixed_div(c, a);
2423                 a.full = dfixed_const(16);
2424                 c.full = dfixed_div(c, a);
2425                 priority_b_mark = dfixed_trunc(c);
2426                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2427
2428                 /* Save number of lines the linebuffer leads before the scanout */
2429                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2430         }
2431
2432         /* select wm A */
2433         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2434         tmp = arb_control3;
2435         tmp &= ~LATENCY_WATERMARK_MASK(3);
2436         tmp |= LATENCY_WATERMARK_MASK(1);
2437         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2438         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2439                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2440                 LATENCY_HIGH_WATERMARK(line_time)));
2441         /* select wm B */
2442         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2443         tmp &= ~LATENCY_WATERMARK_MASK(3);
2444         tmp |= LATENCY_WATERMARK_MASK(2);
2445         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2446         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2447                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2448                 LATENCY_HIGH_WATERMARK(line_time)));
2449         /* restore original selection */
2450         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2451
2452         /* write the priority marks */
2453         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2454         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2455
2456         /* save values for DPM */
2457         radeon_crtc->line_time = line_time;
2458         radeon_crtc->wm_high = latency_watermark_a;
2459         radeon_crtc->wm_low = latency_watermark_b;
2460 }
2461
2462 void dce6_bandwidth_update(struct radeon_device *rdev)
2463 {
2464         struct drm_display_mode *mode0 = NULL;
2465         struct drm_display_mode *mode1 = NULL;
2466         u32 num_heads = 0, lb_size;
2467         int i;
2468
2469         if (!rdev->mode_info.mode_config_initialized)
2470                 return;
2471
2472         radeon_update_display_priority(rdev);
2473
2474         for (i = 0; i < rdev->num_crtc; i++) {
2475                 if (rdev->mode_info.crtcs[i]->base.enabled)
2476                         num_heads++;
2477         }
2478         for (i = 0; i < rdev->num_crtc; i += 2) {
2479                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2480                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2481                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2482                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2483                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2484                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2485         }
2486 }
2487
2488 /*
2489  * Core functions
2490  */
2491 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2492 {
2493         u32 *tile = rdev->config.si.tile_mode_array;
2494         const u32 num_tile_mode_states =
2495                         ARRAY_SIZE(rdev->config.si.tile_mode_array);
2496         u32 reg_offset, split_equal_to_row_size;
2497
2498         switch (rdev->config.si.mem_row_size_in_kb) {
2499         case 1:
2500                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2501                 break;
2502         case 2:
2503         default:
2504                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2505                 break;
2506         case 4:
2507                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2508                 break;
2509         }
2510
2511         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2512                 tile[reg_offset] = 0;
2513
2514         switch(rdev->family) {
2515         case CHIP_TAHITI:
2516         case CHIP_PITCAIRN:
2517                 /* non-AA compressed depth or any compressed stencil */
2518                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2520                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2521                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2522                            NUM_BANKS(ADDR_SURF_16_BANK) |
2523                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2525                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2526                 /* 2xAA/4xAA compressed depth only */
2527                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2528                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2529                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2530                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2531                            NUM_BANKS(ADDR_SURF_16_BANK) |
2532                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2534                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2535                 /* 8xAA compressed depth only */
2536                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2539                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2540                            NUM_BANKS(ADDR_SURF_16_BANK) |
2541                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2543                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2544                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2545                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2547                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2548                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2549                            NUM_BANKS(ADDR_SURF_16_BANK) |
2550                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2552                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2553                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2554                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2556                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2557                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2558                            NUM_BANKS(ADDR_SURF_16_BANK) |
2559                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2562                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2563                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2565                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2566                            TILE_SPLIT(split_equal_to_row_size) |
2567                            NUM_BANKS(ADDR_SURF_16_BANK) |
2568                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2570                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2571                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2572                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2574                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2575                            TILE_SPLIT(split_equal_to_row_size) |
2576                            NUM_BANKS(ADDR_SURF_16_BANK) |
2577                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2580                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2581                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2583                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2584                            TILE_SPLIT(split_equal_to_row_size) |
2585                            NUM_BANKS(ADDR_SURF_16_BANK) |
2586                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2588                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2589                 /* 1D and 1D Array Surfaces */
2590                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2591                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2592                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2593                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2594                            NUM_BANKS(ADDR_SURF_16_BANK) |
2595                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2597                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2598                 /* Displayable maps. */
2599                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2600                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2601                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2602                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2603                            NUM_BANKS(ADDR_SURF_16_BANK) |
2604                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2606                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2607                 /* Display 8bpp. */
2608                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2609                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2610                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2611                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2612                            NUM_BANKS(ADDR_SURF_16_BANK) |
2613                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2615                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2616                 /* Display 16bpp. */
2617                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2619                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2620                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2621                            NUM_BANKS(ADDR_SURF_16_BANK) |
2622                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2624                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2625                 /* Display 32bpp. */
2626                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2627                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2628                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2629                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2630                            NUM_BANKS(ADDR_SURF_16_BANK) |
2631                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2634                 /* Thin. */
2635                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2637                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2638                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2639                            NUM_BANKS(ADDR_SURF_16_BANK) |
2640                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2642                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2643                 /* Thin 8 bpp. */
2644                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2646                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2647                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2648                            NUM_BANKS(ADDR_SURF_16_BANK) |
2649                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2651                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2652                 /* Thin 16 bpp. */
2653                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2655                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2656                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2657                            NUM_BANKS(ADDR_SURF_16_BANK) |
2658                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2660                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2661                 /* Thin 32 bpp. */
2662                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2663                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2664                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2665                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2666                            NUM_BANKS(ADDR_SURF_16_BANK) |
2667                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2670                 /* Thin 64 bpp. */
2671                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2674                            TILE_SPLIT(split_equal_to_row_size) |
2675                            NUM_BANKS(ADDR_SURF_16_BANK) |
2676                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2679                 /* 8 bpp PRT. */
2680                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2684                            NUM_BANKS(ADDR_SURF_16_BANK) |
2685                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2686                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2687                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2688                 /* 16 bpp PRT */
2689                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2691                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2692                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2693                            NUM_BANKS(ADDR_SURF_16_BANK) |
2694                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2696                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2697                 /* 32 bpp PRT */
2698                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2700                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2701                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2702                            NUM_BANKS(ADDR_SURF_16_BANK) |
2703                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2705                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2706                 /* 64 bpp PRT */
2707                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2709                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2710                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2711                            NUM_BANKS(ADDR_SURF_16_BANK) |
2712                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2714                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2715                 /* 128 bpp PRT */
2716                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2718                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2719                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2720                            NUM_BANKS(ADDR_SURF_8_BANK) |
2721                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2723                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2724
2725                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2726                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2727                 break;
2728
2729         case CHIP_VERDE:
2730         case CHIP_OLAND:
2731         case CHIP_HAINAN:
2732                 /* non-AA compressed depth or any compressed stencil */
2733                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2735                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2737                            NUM_BANKS(ADDR_SURF_16_BANK) |
2738                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2741                 /* 2xAA/4xAA compressed depth only */
2742                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2744                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2745                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2746                            NUM_BANKS(ADDR_SURF_16_BANK) |
2747                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2749                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2750                 /* 8xAA compressed depth only */
2751                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2752                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2753                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2755                            NUM_BANKS(ADDR_SURF_16_BANK) |
2756                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2758                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2759                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2760                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2764                            NUM_BANKS(ADDR_SURF_16_BANK) |
2765                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2768                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2769                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2770                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2771                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2773                            NUM_BANKS(ADDR_SURF_16_BANK) |
2774                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2776                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2777                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2778                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2779                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2780                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2781                            TILE_SPLIT(split_equal_to_row_size) |
2782                            NUM_BANKS(ADDR_SURF_16_BANK) |
2783                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2785                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2786                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2787                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2789                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790                            TILE_SPLIT(split_equal_to_row_size) |
2791                            NUM_BANKS(ADDR_SURF_16_BANK) |
2792                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2795                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2796                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2798                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799                            TILE_SPLIT(split_equal_to_row_size) |
2800                            NUM_BANKS(ADDR_SURF_16_BANK) |
2801                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2803                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2804                 /* 1D and 1D Array Surfaces */
2805                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2806                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2809                            NUM_BANKS(ADDR_SURF_16_BANK) |
2810                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2812                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2813                 /* Displayable maps. */
2814                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2816                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2818                            NUM_BANKS(ADDR_SURF_16_BANK) |
2819                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2821                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2822                 /* Display 8bpp. */
2823                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2824                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2826                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2827                            NUM_BANKS(ADDR_SURF_16_BANK) |
2828                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2831                 /* Display 16bpp. */
2832                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2833                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2835                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2836                            NUM_BANKS(ADDR_SURF_16_BANK) |
2837                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2839                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2840                 /* Display 32bpp. */
2841                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2843                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2844                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2845                            NUM_BANKS(ADDR_SURF_16_BANK) |
2846                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2849                 /* Thin. */
2850                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2851                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2852                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2853                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2854                            NUM_BANKS(ADDR_SURF_16_BANK) |
2855                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2857                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2858                 /* Thin 8 bpp. */
2859                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2861                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2862                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2863                            NUM_BANKS(ADDR_SURF_16_BANK) |
2864                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2867                 /* Thin 16 bpp. */
2868                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2870                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2871                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2872                            NUM_BANKS(ADDR_SURF_16_BANK) |
2873                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2875                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2876                 /* Thin 32 bpp. */
2877                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2879                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2880                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2881                            NUM_BANKS(ADDR_SURF_16_BANK) |
2882                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2885                 /* Thin 64 bpp. */
2886                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2888                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2889                            TILE_SPLIT(split_equal_to_row_size) |
2890                            NUM_BANKS(ADDR_SURF_16_BANK) |
2891                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2893                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2894                 /* 8 bpp PRT. */
2895                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2897                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2898                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2899                            NUM_BANKS(ADDR_SURF_16_BANK) |
2900                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2901                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2902                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2903                 /* 16 bpp PRT */
2904                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2907                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2908                            NUM_BANKS(ADDR_SURF_16_BANK) |
2909                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2910                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2911                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2912                 /* 32 bpp PRT */
2913                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2915                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2916                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2917                            NUM_BANKS(ADDR_SURF_16_BANK) |
2918                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2920                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2921                 /* 64 bpp PRT */
2922                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2923                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2924                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2925                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2926                            NUM_BANKS(ADDR_SURF_16_BANK) |
2927                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2929                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2930                 /* 128 bpp PRT */
2931                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2933                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2934                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2935                            NUM_BANKS(ADDR_SURF_8_BANK) |
2936                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2939
2940                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2941                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2942                 break;
2943
2944         default:
2945                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2946         }
2947 }
2948
2949 static void si_select_se_sh(struct radeon_device *rdev,
2950                             u32 se_num, u32 sh_num)
2951 {
2952         u32 data = INSTANCE_BROADCAST_WRITES;
2953
2954         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2955                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2956         else if (se_num == 0xffffffff)
2957                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2958         else if (sh_num == 0xffffffff)
2959                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2960         else
2961                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2962         WREG32(GRBM_GFX_INDEX, data);
2963 }
2964
2965 static u32 si_create_bitmask(u32 bit_width)
2966 {
2967         u32 i, mask = 0;
2968
2969         for (i = 0; i < bit_width; i++) {
2970                 mask <<= 1;
2971                 mask |= 1;
2972         }
2973         return mask;
2974 }
2975
2976 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2977 {
2978         u32 data, mask;
2979
2980         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2981         if (data & 1)
2982                 data &= INACTIVE_CUS_MASK;
2983         else
2984                 data = 0;
2985         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2986
2987         data >>= INACTIVE_CUS_SHIFT;
2988
2989         mask = si_create_bitmask(cu_per_sh);
2990
2991         return ~data & mask;
2992 }
2993
2994 static void si_setup_spi(struct radeon_device *rdev,
2995                          u32 se_num, u32 sh_per_se,
2996                          u32 cu_per_sh)
2997 {
2998         int i, j, k;
2999         u32 data, mask, active_cu;
3000
3001         for (i = 0; i < se_num; i++) {
3002                 for (j = 0; j < sh_per_se; j++) {
3003                         si_select_se_sh(rdev, i, j);
3004                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3005                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3006
3007                         mask = 1;
3008                         for (k = 0; k < 16; k++) {
3009                                 mask <<= k;
3010                                 if (active_cu & mask) {
3011                                         data &= ~mask;
3012                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3013                                         break;
3014                                 }
3015                         }
3016                 }
3017         }
3018         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3019 }
3020
3021 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3022                               u32 max_rb_num_per_se,
3023                               u32 sh_per_se)
3024 {
3025         u32 data, mask;
3026
3027         data = RREG32(CC_RB_BACKEND_DISABLE);
3028         if (data & 1)
3029                 data &= BACKEND_DISABLE_MASK;
3030         else
3031                 data = 0;
3032         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3033
3034         data >>= BACKEND_DISABLE_SHIFT;
3035
3036         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3037
3038         return data & mask;
3039 }
3040
3041 static void si_setup_rb(struct radeon_device *rdev,
3042                         u32 se_num, u32 sh_per_se,
3043                         u32 max_rb_num_per_se)
3044 {
3045         int i, j;
3046         u32 data, mask;
3047         u32 disabled_rbs = 0;
3048         u32 enabled_rbs = 0;
3049
3050         for (i = 0; i < se_num; i++) {
3051                 for (j = 0; j < sh_per_se; j++) {
3052                         si_select_se_sh(rdev, i, j);
3053                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3054                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3055                 }
3056         }
3057         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3058
3059         mask = 1;
3060         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3061                 if (!(disabled_rbs & mask))
3062                         enabled_rbs |= mask;
3063                 mask <<= 1;
3064         }
3065
3066         rdev->config.si.backend_enable_mask = enabled_rbs;
3067
3068         for (i = 0; i < se_num; i++) {
3069                 si_select_se_sh(rdev, i, 0xffffffff);
3070                 data = 0;
3071                 for (j = 0; j < sh_per_se; j++) {
3072                         switch (enabled_rbs & 3) {
3073                         case 1:
3074                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3075                                 break;
3076                         case 2:
3077                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3078                                 break;
3079                         case 3:
3080                         default:
3081                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3082                                 break;
3083                         }
3084                         enabled_rbs >>= 2;
3085                 }
3086                 WREG32(PA_SC_RASTER_CONFIG, data);
3087         }
3088         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3089 }
3090
3091 static void si_gpu_init(struct radeon_device *rdev)
3092 {
3093         u32 gb_addr_config = 0;
3094         u32 mc_shared_chmap, mc_arb_ramcfg;
3095         u32 sx_debug_1;
3096         u32 hdp_host_path_cntl;
3097         u32 tmp;
3098         int i, j;
3099
3100         switch (rdev->family) {
3101         case CHIP_TAHITI:
3102                 rdev->config.si.max_shader_engines = 2;
3103                 rdev->config.si.max_tile_pipes = 12;
3104                 rdev->config.si.max_cu_per_sh = 8;
3105                 rdev->config.si.max_sh_per_se = 2;
3106                 rdev->config.si.max_backends_per_se = 4;
3107                 rdev->config.si.max_texture_channel_caches = 12;
3108                 rdev->config.si.max_gprs = 256;
3109                 rdev->config.si.max_gs_threads = 32;
3110                 rdev->config.si.max_hw_contexts = 8;
3111
3112                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3113                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3114                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3115                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3116                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3117                 break;
3118         case CHIP_PITCAIRN:
3119                 rdev->config.si.max_shader_engines = 2;
3120                 rdev->config.si.max_tile_pipes = 8;
3121                 rdev->config.si.max_cu_per_sh = 5;
3122                 rdev->config.si.max_sh_per_se = 2;
3123                 rdev->config.si.max_backends_per_se = 4;
3124                 rdev->config.si.max_texture_channel_caches = 8;
3125                 rdev->config.si.max_gprs = 256;
3126                 rdev->config.si.max_gs_threads = 32;
3127                 rdev->config.si.max_hw_contexts = 8;
3128
3129                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3130                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3131                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3132                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3133                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3134                 break;
3135         case CHIP_VERDE:
3136         default:
3137                 rdev->config.si.max_shader_engines = 1;
3138                 rdev->config.si.max_tile_pipes = 4;
3139                 rdev->config.si.max_cu_per_sh = 5;
3140                 rdev->config.si.max_sh_per_se = 2;
3141                 rdev->config.si.max_backends_per_se = 4;
3142                 rdev->config.si.max_texture_channel_caches = 4;
3143                 rdev->config.si.max_gprs = 256;
3144                 rdev->config.si.max_gs_threads = 32;
3145                 rdev->config.si.max_hw_contexts = 8;
3146
3147                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3148                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3149                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3150                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3151                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3152                 break;
3153         case CHIP_OLAND:
3154                 rdev->config.si.max_shader_engines = 1;
3155                 rdev->config.si.max_tile_pipes = 4;
3156                 rdev->config.si.max_cu_per_sh = 6;
3157                 rdev->config.si.max_sh_per_se = 1;
3158                 rdev->config.si.max_backends_per_se = 2;
3159                 rdev->config.si.max_texture_channel_caches = 4;
3160                 rdev->config.si.max_gprs = 256;
3161                 rdev->config.si.max_gs_threads = 16;
3162                 rdev->config.si.max_hw_contexts = 8;
3163
3164                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3165                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3166                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3167                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3168                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3169                 break;
3170         case CHIP_HAINAN:
3171                 rdev->config.si.max_shader_engines = 1;
3172                 rdev->config.si.max_tile_pipes = 4;
3173                 rdev->config.si.max_cu_per_sh = 5;
3174                 rdev->config.si.max_sh_per_se = 1;
3175                 rdev->config.si.max_backends_per_se = 1;
3176                 rdev->config.si.max_texture_channel_caches = 2;
3177                 rdev->config.si.max_gprs = 256;
3178                 rdev->config.si.max_gs_threads = 16;
3179                 rdev->config.si.max_hw_contexts = 8;
3180
3181                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3182                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3183                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3184                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3185                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3186                 break;
3187         }
3188
3189         /* Initialize HDP */
3190         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3191                 WREG32((0x2c14 + j), 0x00000000);
3192                 WREG32((0x2c18 + j), 0x00000000);
3193                 WREG32((0x2c1c + j), 0x00000000);
3194                 WREG32((0x2c20 + j), 0x00000000);
3195                 WREG32((0x2c24 + j), 0x00000000);
3196         }
3197
3198         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3199         WREG32(SRBM_INT_CNTL, 1);
3200         WREG32(SRBM_INT_ACK, 1);
3201
3202         evergreen_fix_pci_max_read_req_size(rdev);
3203
3204         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3205
3206         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3207         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3208
3209         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3210         rdev->config.si.mem_max_burst_length_bytes = 256;
3211         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3212         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3213         if (rdev->config.si.mem_row_size_in_kb > 4)
3214                 rdev->config.si.mem_row_size_in_kb = 4;
3215         /* XXX use MC settings? */
3216         rdev->config.si.shader_engine_tile_size = 32;
3217         rdev->config.si.num_gpus = 1;
3218         rdev->config.si.multi_gpu_tile_size = 64;
3219
3220         /* fix up row size */
3221         gb_addr_config &= ~ROW_SIZE_MASK;
3222         switch (rdev->config.si.mem_row_size_in_kb) {
3223         case 1:
3224         default:
3225                 gb_addr_config |= ROW_SIZE(0);
3226                 break;
3227         case 2:
3228                 gb_addr_config |= ROW_SIZE(1);
3229                 break;
3230         case 4:
3231                 gb_addr_config |= ROW_SIZE(2);
3232                 break;
3233         }
3234
3235         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3236          * not have bank info, so create a custom tiling dword.
3237          * bits 3:0   num_pipes
3238          * bits 7:4   num_banks
3239          * bits 11:8  group_size
3240          * bits 15:12 row_size
3241          */
3242         rdev->config.si.tile_config = 0;
3243         switch (rdev->config.si.num_tile_pipes) {
3244         case 1:
3245                 rdev->config.si.tile_config |= (0 << 0);
3246                 break;
3247         case 2:
3248                 rdev->config.si.tile_config |= (1 << 0);
3249                 break;
3250         case 4:
3251                 rdev->config.si.tile_config |= (2 << 0);
3252                 break;
3253         case 8:
3254         default:
3255                 /* XXX what about 12? */
3256                 rdev->config.si.tile_config |= (3 << 0);
3257                 break;
3258         }       
3259         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3260         case 0: /* four banks */
3261                 rdev->config.si.tile_config |= 0 << 4;
3262                 break;
3263         case 1: /* eight banks */
3264                 rdev->config.si.tile_config |= 1 << 4;
3265                 break;
3266         case 2: /* sixteen banks */
3267         default:
3268                 rdev->config.si.tile_config |= 2 << 4;
3269                 break;
3270         }
3271         rdev->config.si.tile_config |=
3272                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3273         rdev->config.si.tile_config |=
3274                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3275
3276         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3277         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3278         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3279         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3280         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3281         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3282         if (rdev->has_uvd) {
3283                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3284                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3285                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3286         }
3287
3288         si_tiling_mode_table_init(rdev);
3289
3290         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3291                     rdev->config.si.max_sh_per_se,
3292                     rdev->config.si.max_backends_per_se);
3293
3294         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3295                      rdev->config.si.max_sh_per_se,
3296                      rdev->config.si.max_cu_per_sh);
3297
3298         rdev->config.si.active_cus = 0;
3299         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3300                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3301                         rdev->config.si.active_cus +=
3302                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3303                 }
3304         }
3305
3306         /* set HW defaults for 3D engine */
3307         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3308                                      ROQ_IB2_START(0x2b)));
3309         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3310
3311         sx_debug_1 = RREG32(SX_DEBUG_1);
3312         WREG32(SX_DEBUG_1, sx_debug_1);
3313
3314         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3315
3316         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3317                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3318                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3319                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3320
3321         WREG32(VGT_NUM_INSTANCES, 1);
3322
3323         WREG32(CP_PERFMON_CNTL, 0);
3324
3325         WREG32(SQ_CONFIG, 0);
3326
3327         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3328                                           FORCE_EOV_MAX_REZ_CNT(255)));
3329
3330         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3331                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3332
3333         WREG32(VGT_GS_VERTEX_REUSE, 16);
3334         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3335
3336         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3337         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3338         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3339         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3340         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3341         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3342         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3343         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3344
3345         tmp = RREG32(HDP_MISC_CNTL);
3346         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3347         WREG32(HDP_MISC_CNTL, tmp);
3348
3349         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3350         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3351
3352         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3353
3354         udelay(50);
3355 }
3356
3357 /*
3358  * GPU scratch registers helpers function.
3359  */
3360 static void si_scratch_init(struct radeon_device *rdev)
3361 {
3362         int i;
3363
3364         rdev->scratch.num_reg = 7;
3365         rdev->scratch.reg_base = SCRATCH_REG0;
3366         for (i = 0; i < rdev->scratch.num_reg; i++) {
3367                 rdev->scratch.free[i] = true;
3368                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3369         }
3370 }
3371
3372 void si_fence_ring_emit(struct radeon_device *rdev,
3373                         struct radeon_fence *fence)
3374 {
3375         struct radeon_ring *ring = &rdev->ring[fence->ring];
3376         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3377
3378         /* flush read cache over gart */
3379         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3380         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3381         radeon_ring_write(ring, 0);
3382         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3383         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3384                           PACKET3_TC_ACTION_ENA |
3385                           PACKET3_SH_KCACHE_ACTION_ENA |
3386                           PACKET3_SH_ICACHE_ACTION_ENA);
3387         radeon_ring_write(ring, 0xFFFFFFFF);
3388         radeon_ring_write(ring, 0);
3389         radeon_ring_write(ring, 10); /* poll interval */
3390         /* EVENT_WRITE_EOP - flush caches, send int */
3391         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3392         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3393         radeon_ring_write(ring, lower_32_bits(addr));
3394         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3395         radeon_ring_write(ring, fence->seq);
3396         radeon_ring_write(ring, 0);
3397 }
3398
3399 /*
3400  * IB stuff
3401  */
3402 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3403 {
3404         struct radeon_ring *ring = &rdev->ring[ib->ring];
3405         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3406         u32 header;
3407
3408         if (ib->is_const_ib) {
3409                 /* set switch buffer packet before const IB */
3410                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3411                 radeon_ring_write(ring, 0);
3412
3413                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3414         } else {
3415                 u32 next_rptr;
3416                 if (ring->rptr_save_reg) {
3417                         next_rptr = ring->wptr + 3 + 4 + 8;
3418                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3419                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3420                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3421                         radeon_ring_write(ring, next_rptr);
3422                 } else if (rdev->wb.enabled) {
3423                         next_rptr = ring->wptr + 5 + 4 + 8;
3424                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3425                         radeon_ring_write(ring, (1 << 8));
3426                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3427                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3428                         radeon_ring_write(ring, next_rptr);
3429                 }
3430
3431                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3432         }
3433
3434         radeon_ring_write(ring, header);
3435         radeon_ring_write(ring,
3436 #ifdef __BIG_ENDIAN
3437                           (2 << 0) |
3438 #endif
3439                           (ib->gpu_addr & 0xFFFFFFFC));
3440         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3441         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3442
3443         if (!ib->is_const_ib) {
3444                 /* flush read cache over gart for this vmid */
3445                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3446                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3447                 radeon_ring_write(ring, vm_id);
3448                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3449                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3450                                   PACKET3_TC_ACTION_ENA |
3451                                   PACKET3_SH_KCACHE_ACTION_ENA |
3452                                   PACKET3_SH_ICACHE_ACTION_ENA);
3453                 radeon_ring_write(ring, 0xFFFFFFFF);
3454                 radeon_ring_write(ring, 0);
3455                 radeon_ring_write(ring, 10); /* poll interval */
3456         }
3457 }
3458
3459 /*
3460  * CP.
3461  */
3462 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3463 {
3464         if (enable)
3465                 WREG32(CP_ME_CNTL, 0);
3466         else {
3467                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3468                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3469                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3470                 WREG32(SCRATCH_UMSK, 0);
3471                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3472                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3473                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3474         }
3475         udelay(50);
3476 }
3477
3478 static int si_cp_load_microcode(struct radeon_device *rdev)
3479 {
3480         int i;
3481
3482         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3483                 return -EINVAL;
3484
3485         si_cp_enable(rdev, false);
3486
3487         if (rdev->new_fw) {
3488                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3489                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3490                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3491                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3492                 const struct gfx_firmware_header_v1_0 *me_hdr =
3493                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3494                 const __le32 *fw_data;
3495                 u32 fw_size;
3496
3497                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3498                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3499                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3500
3501                 /* PFP */
3502                 fw_data = (const __le32 *)
3503                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3504                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3505                 WREG32(CP_PFP_UCODE_ADDR, 0);
3506                 for (i = 0; i < fw_size; i++)
3507                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3508                 WREG32(CP_PFP_UCODE_ADDR, 0);
3509
3510                 /* CE */
3511                 fw_data = (const __le32 *)
3512                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3513                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3514                 WREG32(CP_CE_UCODE_ADDR, 0);
3515                 for (i = 0; i < fw_size; i++)
3516                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3517                 WREG32(CP_CE_UCODE_ADDR, 0);
3518
3519                 /* ME */
3520                 fw_data = (const __be32 *)
3521                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3522                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3523                 WREG32(CP_ME_RAM_WADDR, 0);
3524                 for (i = 0; i < fw_size; i++)
3525                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3526                 WREG32(CP_ME_RAM_WADDR, 0);
3527         } else {
3528                 const __be32 *fw_data;
3529
3530                 /* PFP */
3531                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3532                 WREG32(CP_PFP_UCODE_ADDR, 0);
3533                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3534                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3535                 WREG32(CP_PFP_UCODE_ADDR, 0);
3536
3537                 /* CE */
3538                 fw_data = (const __be32 *)rdev->ce_fw->data;
3539                 WREG32(CP_CE_UCODE_ADDR, 0);
3540                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3541                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3542                 WREG32(CP_CE_UCODE_ADDR, 0);
3543
3544                 /* ME */
3545                 fw_data = (const __be32 *)rdev->me_fw->data;
3546                 WREG32(CP_ME_RAM_WADDR, 0);
3547                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3548                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3549                 WREG32(CP_ME_RAM_WADDR, 0);
3550         }
3551
3552         WREG32(CP_PFP_UCODE_ADDR, 0);
3553         WREG32(CP_CE_UCODE_ADDR, 0);
3554         WREG32(CP_ME_RAM_WADDR, 0);
3555         WREG32(CP_ME_RAM_RADDR, 0);
3556         return 0;
3557 }
3558
3559 static int si_cp_start(struct radeon_device *rdev)
3560 {
3561         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3562         int r, i;
3563
3564         r = radeon_ring_lock(rdev, ring, 7 + 4);
3565         if (r) {
3566                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3567                 return r;
3568         }
3569         /* init the CP */
3570         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3571         radeon_ring_write(ring, 0x1);
3572         radeon_ring_write(ring, 0x0);
3573         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3574         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3575         radeon_ring_write(ring, 0);
3576         radeon_ring_write(ring, 0);
3577
3578         /* init the CE partitions */
3579         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3580         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3581         radeon_ring_write(ring, 0xc000);
3582         radeon_ring_write(ring, 0xe000);
3583         radeon_ring_unlock_commit(rdev, ring, false);
3584
3585         si_cp_enable(rdev, true);
3586
3587         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3588         if (r) {
3589                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3590                 return r;
3591         }
3592
3593         /* setup clear context state */
3594         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3595         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3596
3597         for (i = 0; i < si_default_size; i++)
3598                 radeon_ring_write(ring, si_default_state[i]);
3599
3600         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3601         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3602
3603         /* set clear context state */
3604         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3605         radeon_ring_write(ring, 0);
3606
3607         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3608         radeon_ring_write(ring, 0x00000316);
3609         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3610         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3611
3612         radeon_ring_unlock_commit(rdev, ring, false);
3613
3614         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3615                 ring = &rdev->ring[i];
3616                 r = radeon_ring_lock(rdev, ring, 2);
3617
3618                 /* clear the compute context state */
3619                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3620                 radeon_ring_write(ring, 0);
3621
3622                 radeon_ring_unlock_commit(rdev, ring, false);
3623         }
3624
3625         return 0;
3626 }
3627
3628 static void si_cp_fini(struct radeon_device *rdev)
3629 {
3630         struct radeon_ring *ring;
3631         si_cp_enable(rdev, false);
3632
3633         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3634         radeon_ring_fini(rdev, ring);
3635         radeon_scratch_free(rdev, ring->rptr_save_reg);
3636
3637         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3638         radeon_ring_fini(rdev, ring);
3639         radeon_scratch_free(rdev, ring->rptr_save_reg);
3640
3641         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3642         radeon_ring_fini(rdev, ring);
3643         radeon_scratch_free(rdev, ring->rptr_save_reg);
3644 }
3645
3646 static int si_cp_resume(struct radeon_device *rdev)
3647 {
3648         struct radeon_ring *ring;
3649         u32 tmp;
3650         u32 rb_bufsz;
3651         int r;
3652
3653         si_enable_gui_idle_interrupt(rdev, false);
3654
3655         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3656         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3657
3658         /* Set the write pointer delay */
3659         WREG32(CP_RB_WPTR_DELAY, 0);
3660
3661         WREG32(CP_DEBUG, 0);
3662         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3663
3664         /* ring 0 - compute and gfx */
3665         /* Set ring buffer size */
3666         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3667         rb_bufsz = order_base_2(ring->ring_size / 8);
3668         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3669 #ifdef __BIG_ENDIAN
3670         tmp |= BUF_SWAP_32BIT;
3671 #endif
3672         WREG32(CP_RB0_CNTL, tmp);
3673
3674         /* Initialize the ring buffer's read and write pointers */
3675         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3676         ring->wptr = 0;
3677         WREG32(CP_RB0_WPTR, ring->wptr);
3678
3679         /* set the wb address whether it's enabled or not */
3680         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3681         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3682
3683         if (rdev->wb.enabled)
3684                 WREG32(SCRATCH_UMSK, 0xff);
3685         else {
3686                 tmp |= RB_NO_UPDATE;
3687                 WREG32(SCRATCH_UMSK, 0);
3688         }
3689
3690         mdelay(1);
3691         WREG32(CP_RB0_CNTL, tmp);
3692
3693         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3694
3695         /* ring1  - compute only */
3696         /* Set ring buffer size */
3697         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3698         rb_bufsz = order_base_2(ring->ring_size / 8);
3699         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3700 #ifdef __BIG_ENDIAN
3701         tmp |= BUF_SWAP_32BIT;
3702 #endif
3703         WREG32(CP_RB1_CNTL, tmp);
3704
3705         /* Initialize the ring buffer's read and write pointers */
3706         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3707         ring->wptr = 0;
3708         WREG32(CP_RB1_WPTR, ring->wptr);
3709
3710         /* set the wb address whether it's enabled or not */
3711         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3712         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3713
3714         mdelay(1);
3715         WREG32(CP_RB1_CNTL, tmp);
3716
3717         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3718
3719         /* ring2 - compute only */
3720         /* Set ring buffer size */
3721         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3722         rb_bufsz = order_base_2(ring->ring_size / 8);
3723         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3724 #ifdef __BIG_ENDIAN
3725         tmp |= BUF_SWAP_32BIT;
3726 #endif
3727         WREG32(CP_RB2_CNTL, tmp);
3728
3729         /* Initialize the ring buffer's read and write pointers */
3730         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3731         ring->wptr = 0;
3732         WREG32(CP_RB2_WPTR, ring->wptr);
3733
3734         /* set the wb address whether it's enabled or not */
3735         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3736         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3737
3738         mdelay(1);
3739         WREG32(CP_RB2_CNTL, tmp);
3740
3741         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3742
3743         /* start the rings */
3744         si_cp_start(rdev);
3745         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3746         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3747         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3748         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3749         if (r) {
3750                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3751                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3752                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3753                 return r;
3754         }
3755         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3756         if (r) {
3757                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3758         }
3759         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3760         if (r) {
3761                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3762         }
3763
3764         si_enable_gui_idle_interrupt(rdev, true);
3765
3766         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3767                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3768
3769         return 0;
3770 }
3771
3772 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3773 {
3774         u32 reset_mask = 0;
3775         u32 tmp;
3776
3777         /* GRBM_STATUS */
3778         tmp = RREG32(GRBM_STATUS);
3779         if (tmp & (PA_BUSY | SC_BUSY |
3780                    BCI_BUSY | SX_BUSY |
3781                    TA_BUSY | VGT_BUSY |
3782                    DB_BUSY | CB_BUSY |
3783                    GDS_BUSY | SPI_BUSY |
3784                    IA_BUSY | IA_BUSY_NO_DMA))
3785                 reset_mask |= RADEON_RESET_GFX;
3786
3787         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3788                    CP_BUSY | CP_COHERENCY_BUSY))
3789                 reset_mask |= RADEON_RESET_CP;
3790
3791         if (tmp & GRBM_EE_BUSY)
3792                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3793
3794         /* GRBM_STATUS2 */
3795         tmp = RREG32(GRBM_STATUS2);
3796         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3797                 reset_mask |= RADEON_RESET_RLC;
3798
3799         /* DMA_STATUS_REG 0 */
3800         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3801         if (!(tmp & DMA_IDLE))
3802                 reset_mask |= RADEON_RESET_DMA;
3803
3804         /* DMA_STATUS_REG 1 */
3805         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3806         if (!(tmp & DMA_IDLE))
3807                 reset_mask |= RADEON_RESET_DMA1;
3808
3809         /* SRBM_STATUS2 */
3810         tmp = RREG32(SRBM_STATUS2);
3811         if (tmp & DMA_BUSY)
3812                 reset_mask |= RADEON_RESET_DMA;
3813
3814         if (tmp & DMA1_BUSY)
3815                 reset_mask |= RADEON_RESET_DMA1;
3816
3817         /* SRBM_STATUS */
3818         tmp = RREG32(SRBM_STATUS);
3819
3820         if (tmp & IH_BUSY)
3821                 reset_mask |= RADEON_RESET_IH;
3822
3823         if (tmp & SEM_BUSY)
3824                 reset_mask |= RADEON_RESET_SEM;
3825
3826         if (tmp & GRBM_RQ_PENDING)
3827                 reset_mask |= RADEON_RESET_GRBM;
3828
3829         if (tmp & VMC_BUSY)
3830                 reset_mask |= RADEON_RESET_VMC;
3831
3832         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3833                    MCC_BUSY | MCD_BUSY))
3834                 reset_mask |= RADEON_RESET_MC;
3835
3836         if (evergreen_is_display_hung(rdev))
3837                 reset_mask |= RADEON_RESET_DISPLAY;
3838
3839         /* VM_L2_STATUS */
3840         tmp = RREG32(VM_L2_STATUS);
3841         if (tmp & L2_BUSY)
3842                 reset_mask |= RADEON_RESET_VMC;
3843
3844         /* Skip MC reset as it's mostly likely not hung, just busy */
3845         if (reset_mask & RADEON_RESET_MC) {
3846                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3847                 reset_mask &= ~RADEON_RESET_MC;
3848         }
3849
3850         return reset_mask;
3851 }
3852
3853 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3854 {
3855         struct evergreen_mc_save save;
3856         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3857         u32 tmp;
3858
3859         if (reset_mask == 0)
3860                 return;
3861
3862         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3863
3864         evergreen_print_gpu_status_regs(rdev);
3865         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3866                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3867         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3868                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3869
3870         /* disable PG/CG */
3871         si_fini_pg(rdev);
3872         si_fini_cg(rdev);
3873
3874         /* stop the rlc */
3875         si_rlc_stop(rdev);
3876
3877         /* Disable CP parsing/prefetching */
3878         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3879
3880         if (reset_mask & RADEON_RESET_DMA) {
3881                 /* dma0 */
3882                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3883                 tmp &= ~DMA_RB_ENABLE;
3884                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3885         }
3886         if (reset_mask & RADEON_RESET_DMA1) {
3887                 /* dma1 */
3888                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3889                 tmp &= ~DMA_RB_ENABLE;
3890                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3891         }
3892
3893         udelay(50);
3894
3895         evergreen_mc_stop(rdev, &save);
3896         if (evergreen_mc_wait_for_idle(rdev)) {
3897                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3898         }
3899
3900         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3901                 grbm_soft_reset = SOFT_RESET_CB |
3902                         SOFT_RESET_DB |
3903                         SOFT_RESET_GDS |
3904                         SOFT_RESET_PA |
3905                         SOFT_RESET_SC |
3906                         SOFT_RESET_BCI |
3907                         SOFT_RESET_SPI |
3908                         SOFT_RESET_SX |
3909                         SOFT_RESET_TC |
3910                         SOFT_RESET_TA |
3911                         SOFT_RESET_VGT |
3912                         SOFT_RESET_IA;
3913         }
3914
3915         if (reset_mask & RADEON_RESET_CP) {
3916                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3917
3918                 srbm_soft_reset |= SOFT_RESET_GRBM;
3919         }
3920
3921         if (reset_mask & RADEON_RESET_DMA)
3922                 srbm_soft_reset |= SOFT_RESET_DMA;
3923
3924         if (reset_mask & RADEON_RESET_DMA1)
3925                 srbm_soft_reset |= SOFT_RESET_DMA1;
3926
3927         if (reset_mask & RADEON_RESET_DISPLAY)
3928                 srbm_soft_reset |= SOFT_RESET_DC;
3929
3930         if (reset_mask & RADEON_RESET_RLC)
3931                 grbm_soft_reset |= SOFT_RESET_RLC;
3932
3933         if (reset_mask & RADEON_RESET_SEM)
3934                 srbm_soft_reset |= SOFT_RESET_SEM;
3935
3936         if (reset_mask & RADEON_RESET_IH)
3937                 srbm_soft_reset |= SOFT_RESET_IH;
3938
3939         if (reset_mask & RADEON_RESET_GRBM)
3940                 srbm_soft_reset |= SOFT_RESET_GRBM;
3941
3942         if (reset_mask & RADEON_RESET_VMC)
3943                 srbm_soft_reset |= SOFT_RESET_VMC;
3944
3945         if (reset_mask & RADEON_RESET_MC)
3946                 srbm_soft_reset |= SOFT_RESET_MC;
3947
3948         if (grbm_soft_reset) {
3949                 tmp = RREG32(GRBM_SOFT_RESET);
3950                 tmp |= grbm_soft_reset;
3951                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3952                 WREG32(GRBM_SOFT_RESET, tmp);
3953                 tmp = RREG32(GRBM_SOFT_RESET);
3954
3955                 udelay(50);
3956
3957                 tmp &= ~grbm_soft_reset;
3958                 WREG32(GRBM_SOFT_RESET, tmp);
3959                 tmp = RREG32(GRBM_SOFT_RESET);
3960         }
3961
3962         if (srbm_soft_reset) {
3963                 tmp = RREG32(SRBM_SOFT_RESET);
3964                 tmp |= srbm_soft_reset;
3965                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3966                 WREG32(SRBM_SOFT_RESET, tmp);
3967                 tmp = RREG32(SRBM_SOFT_RESET);
3968
3969                 udelay(50);
3970
3971                 tmp &= ~srbm_soft_reset;
3972                 WREG32(SRBM_SOFT_RESET, tmp);
3973                 tmp = RREG32(SRBM_SOFT_RESET);
3974         }
3975
3976         /* Wait a little for things to settle down */
3977         udelay(50);
3978
3979         evergreen_mc_resume(rdev, &save);
3980         udelay(50);
3981
3982         evergreen_print_gpu_status_regs(rdev);
3983 }
3984
3985 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3986 {
3987         u32 tmp, i;
3988
3989         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3990         tmp |= SPLL_BYPASS_EN;
3991         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3992
3993         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3994         tmp |= SPLL_CTLREQ_CHG;
3995         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3996
3997         for (i = 0; i < rdev->usec_timeout; i++) {
3998                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3999                         break;
4000                 udelay(1);
4001         }
4002
4003         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4004         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4005         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4006
4007         tmp = RREG32(MPLL_CNTL_MODE);
4008         tmp &= ~MPLL_MCLK_SEL;
4009         WREG32(MPLL_CNTL_MODE, tmp);
4010 }
4011
4012 static void si_spll_powerdown(struct radeon_device *rdev)
4013 {
4014         u32 tmp;
4015
4016         tmp = RREG32(SPLL_CNTL_MODE);
4017         tmp |= SPLL_SW_DIR_CONTROL;
4018         WREG32(SPLL_CNTL_MODE, tmp);
4019
4020         tmp = RREG32(CG_SPLL_FUNC_CNTL);
4021         tmp |= SPLL_RESET;
4022         WREG32(CG_SPLL_FUNC_CNTL, tmp);
4023
4024         tmp = RREG32(CG_SPLL_FUNC_CNTL);
4025         tmp |= SPLL_SLEEP;
4026         WREG32(CG_SPLL_FUNC_CNTL, tmp);
4027
4028         tmp = RREG32(SPLL_CNTL_MODE);
4029         tmp &= ~SPLL_SW_DIR_CONTROL;
4030         WREG32(SPLL_CNTL_MODE, tmp);
4031 }
4032
4033 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4034 {
4035         struct evergreen_mc_save save;
4036         u32 tmp, i;
4037
4038         dev_info(rdev->dev, "GPU pci config reset\n");
4039
4040         /* disable dpm? */
4041
4042         /* disable cg/pg */
4043         si_fini_pg(rdev);
4044         si_fini_cg(rdev);
4045
4046         /* Disable CP parsing/prefetching */
4047         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4048         /* dma0 */
4049         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4050         tmp &= ~DMA_RB_ENABLE;
4051         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4052         /* dma1 */
4053         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4054         tmp &= ~DMA_RB_ENABLE;
4055         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4056         /* XXX other engines? */
4057
4058         /* halt the rlc, disable cp internal ints */
4059         si_rlc_stop(rdev);
4060
4061         udelay(50);
4062
4063         /* disable mem access */
4064         evergreen_mc_stop(rdev, &save);
4065         if (evergreen_mc_wait_for_idle(rdev)) {
4066                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4067         }
4068
4069         /* set mclk/sclk to bypass */
4070         si_set_clk_bypass_mode(rdev);
4071         /* powerdown spll */
4072         si_spll_powerdown(rdev);
4073         /* disable BM */
4074         pci_clear_master(rdev->pdev);
4075         /* reset */
4076         radeon_pci_config_reset(rdev);
4077         /* wait for asic to come out of reset */
4078         for (i = 0; i < rdev->usec_timeout; i++) {
4079                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4080                         break;
4081                 udelay(1);
4082         }
4083 }
4084
4085 int si_asic_reset(struct radeon_device *rdev, bool hard)
4086 {
4087         u32 reset_mask;
4088
4089         if (hard) {
4090                 si_gpu_pci_config_reset(rdev);
4091                 return 0;
4092         }
4093
4094         reset_mask = si_gpu_check_soft_reset(rdev);
4095
4096         if (reset_mask)
4097                 r600_set_bios_scratch_engine_hung(rdev, true);
4098
4099         /* try soft reset */
4100         si_gpu_soft_reset(rdev, reset_mask);
4101
4102         reset_mask = si_gpu_check_soft_reset(rdev);
4103
4104         /* try pci config reset */
4105         if (reset_mask && radeon_hard_reset)
4106                 si_gpu_pci_config_reset(rdev);
4107
4108         reset_mask = si_gpu_check_soft_reset(rdev);
4109
4110         if (!reset_mask)
4111                 r600_set_bios_scratch_engine_hung(rdev, false);
4112
4113         return 0;
4114 }
4115
4116 /**
4117  * si_gfx_is_lockup - Check if the GFX engine is locked up
4118  *
4119  * @rdev: radeon_device pointer
4120  * @ring: radeon_ring structure holding ring information
4121  *
4122  * Check if the GFX engine is locked up.
4123  * Returns true if the engine appears to be locked up, false if not.
4124  */
4125 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4126 {
4127         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4128
4129         if (!(reset_mask & (RADEON_RESET_GFX |
4130                             RADEON_RESET_COMPUTE |
4131                             RADEON_RESET_CP))) {
4132                 radeon_ring_lockup_update(rdev, ring);
4133                 return false;
4134         }
4135         return radeon_ring_test_lockup(rdev, ring);
4136 }
4137
4138 /* MC */
4139 static void si_mc_program(struct radeon_device *rdev)
4140 {
4141         struct evergreen_mc_save save;
4142         u32 tmp;
4143         int i, j;
4144
4145         /* Initialize HDP */
4146         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4147                 WREG32((0x2c14 + j), 0x00000000);
4148                 WREG32((0x2c18 + j), 0x00000000);
4149                 WREG32((0x2c1c + j), 0x00000000);
4150                 WREG32((0x2c20 + j), 0x00000000);
4151                 WREG32((0x2c24 + j), 0x00000000);
4152         }
4153         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4154
4155         evergreen_mc_stop(rdev, &save);
4156         if (radeon_mc_wait_for_idle(rdev)) {
4157                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4158         }
4159         if (!ASIC_IS_NODCE(rdev))
4160                 /* Lockout access through VGA aperture*/
4161                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4162         /* Update configuration */
4163         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4164                rdev->mc.vram_start >> 12);
4165         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4166                rdev->mc.vram_end >> 12);
4167         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4168                rdev->vram_scratch.gpu_addr >> 12);
4169         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4170         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4171         WREG32(MC_VM_FB_LOCATION, tmp);
4172         /* XXX double check these! */
4173         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4174         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4175         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4176         WREG32(MC_VM_AGP_BASE, 0);
4177         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4178         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4179         if (radeon_mc_wait_for_idle(rdev)) {
4180                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4181         }
4182         evergreen_mc_resume(rdev, &save);
4183         if (!ASIC_IS_NODCE(rdev)) {
4184                 /* we need to own VRAM, so turn off the VGA renderer here
4185                  * to stop it overwriting our objects */
4186                 rv515_vga_render_disable(rdev);
4187         }
4188 }
4189
4190 void si_vram_gtt_location(struct radeon_device *rdev,
4191                           struct radeon_mc *mc)
4192 {
4193         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4194                 /* leave room for at least 1024M GTT */
4195                 dev_warn(rdev->dev, "limiting VRAM\n");
4196                 mc->real_vram_size = 0xFFC0000000ULL;
4197                 mc->mc_vram_size = 0xFFC0000000ULL;
4198         }
4199         radeon_vram_location(rdev, &rdev->mc, 0);
4200         rdev->mc.gtt_base_align = 0;
4201         radeon_gtt_location(rdev, mc);
4202 }
4203
4204 static int si_mc_init(struct radeon_device *rdev)
4205 {
4206         u32 tmp;
4207         int chansize, numchan;
4208
4209         /* Get VRAM informations */
4210         rdev->mc.vram_is_ddr = true;
4211         tmp = RREG32(MC_ARB_RAMCFG);
4212         if (tmp & CHANSIZE_OVERRIDE) {
4213                 chansize = 16;
4214         } else if (tmp & CHANSIZE_MASK) {
4215                 chansize = 64;
4216         } else {
4217                 chansize = 32;
4218         }
4219         tmp = RREG32(MC_SHARED_CHMAP);
4220         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4221         case 0:
4222         default:
4223                 numchan = 1;
4224                 break;
4225         case 1:
4226                 numchan = 2;
4227                 break;
4228         case 2:
4229                 numchan = 4;
4230                 break;
4231         case 3:
4232                 numchan = 8;
4233                 break;
4234         case 4:
4235                 numchan = 3;
4236                 break;
4237         case 5:
4238                 numchan = 6;
4239                 break;
4240         case 6:
4241                 numchan = 10;
4242                 break;
4243         case 7:
4244                 numchan = 12;
4245                 break;
4246         case 8:
4247                 numchan = 16;
4248                 break;
4249         }
4250         rdev->mc.vram_width = numchan * chansize;
4251         /* Could aper size report 0 ? */
4252         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4253         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4254         /* size in MB on si */
4255         tmp = RREG32(CONFIG_MEMSIZE);
4256         /* some boards may have garbage in the upper 16 bits */
4257         if (tmp & 0xffff0000) {
4258                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4259                 if (tmp & 0xffff)
4260                         tmp &= 0xffff;
4261         }
4262         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4263         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4264         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4265         si_vram_gtt_location(rdev, &rdev->mc);
4266         radeon_update_bandwidth_info(rdev);
4267
4268         return 0;
4269 }
4270
4271 /*
4272  * GART
4273  */
4274 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4275 {
4276         /* flush hdp cache */
4277         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4278
4279         /* bits 0-15 are the VM contexts0-15 */
4280         WREG32(VM_INVALIDATE_REQUEST, 1);
4281 }
4282
4283 static int si_pcie_gart_enable(struct radeon_device *rdev)
4284 {
4285         int r, i;
4286
4287         if (rdev->gart.robj == NULL) {
4288                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4289                 return -EINVAL;
4290         }
4291         r = radeon_gart_table_vram_pin(rdev);
4292         if (r)
4293                 return r;
4294         /* Setup TLB control */
4295         WREG32(MC_VM_MX_L1_TLB_CNTL,
4296                (0xA << 7) |
4297                ENABLE_L1_TLB |
4298                ENABLE_L1_FRAGMENT_PROCESSING |
4299                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4300                ENABLE_ADVANCED_DRIVER_MODEL |
4301                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4302         /* Setup L2 cache */
4303         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4304                ENABLE_L2_FRAGMENT_PROCESSING |
4305                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4306                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4307                EFFECTIVE_L2_QUEUE_SIZE(7) |
4308                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4309         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4310         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4311                BANK_SELECT(4) |
4312                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4313         /* setup context0 */
4314         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4315         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4316         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4317         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4318                         (u32)(rdev->dummy_page.addr >> 12));
4319         WREG32(VM_CONTEXT0_CNTL2, 0);
4320         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4321                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4322
4323         WREG32(0x15D4, 0);
4324         WREG32(0x15D8, 0);
4325         WREG32(0x15DC, 0);
4326
4327         /* empty context1-15 */
4328         /* set vm size, must be a multiple of 4 */
4329         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4330         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4331         /* Assign the pt base to something valid for now; the pts used for
4332          * the VMs are determined by the application and setup and assigned
4333          * on the fly in the vm part of radeon_gart.c
4334          */
4335         for (i = 1; i < 16; i++) {
4336                 if (i < 8)
4337                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4338                                rdev->vm_manager.saved_table_addr[i]);
4339                 else
4340                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4341                                rdev->vm_manager.saved_table_addr[i]);
4342         }
4343
4344         /* enable context1-15 */
4345         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4346                (u32)(rdev->dummy_page.addr >> 12));
4347         WREG32(VM_CONTEXT1_CNTL2, 4);
4348         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4349                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4350                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4351                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4352                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4353                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4354                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4355                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4356                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4357                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4358                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4359                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4360                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4361                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4362
4363         si_pcie_gart_tlb_flush(rdev);
4364         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4365                  (unsigned)(rdev->mc.gtt_size >> 20),
4366                  (unsigned long long)rdev->gart.table_addr);
4367         rdev->gart.ready = true;
4368         return 0;
4369 }
4370
4371 static void si_pcie_gart_disable(struct radeon_device *rdev)
4372 {
4373         unsigned i;
4374
4375         for (i = 1; i < 16; ++i) {
4376                 uint32_t reg;
4377                 if (i < 8)
4378                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4379                 else
4380                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4381                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4382         }
4383
4384         /* Disable all tables */
4385         WREG32(VM_CONTEXT0_CNTL, 0);
4386         WREG32(VM_CONTEXT1_CNTL, 0);
4387         /* Setup TLB control */
4388         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4389                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4390         /* Setup L2 cache */
4391         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4392                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4393                EFFECTIVE_L2_QUEUE_SIZE(7) |
4394                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4395         WREG32(VM_L2_CNTL2, 0);
4396         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4397                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4398         radeon_gart_table_vram_unpin(rdev);
4399 }
4400
4401 static void si_pcie_gart_fini(struct radeon_device *rdev)
4402 {
4403         si_pcie_gart_disable(rdev);
4404         radeon_gart_table_vram_free(rdev);
4405         radeon_gart_fini(rdev);
4406 }
4407
4408 /* vm parser */
4409 static bool si_vm_reg_valid(u32 reg)
4410 {
4411         /* context regs are fine */
4412         if (reg >= 0x28000)
4413                 return true;
4414
4415         /* shader regs are also fine */
4416         if (reg >= 0xB000 && reg < 0xC000)
4417                 return true;
4418
4419         /* check config regs */
4420         switch (reg) {
4421         case GRBM_GFX_INDEX:
4422         case CP_STRMOUT_CNTL:
4423         case VGT_VTX_VECT_EJECT_REG:
4424         case VGT_CACHE_INVALIDATION:
4425         case VGT_ESGS_RING_SIZE:
4426         case VGT_GSVS_RING_SIZE:
4427         case VGT_GS_VERTEX_REUSE:
4428         case VGT_PRIMITIVE_TYPE:
4429         case VGT_INDEX_TYPE:
4430         case VGT_NUM_INDICES:
4431         case VGT_NUM_INSTANCES:
4432         case VGT_TF_RING_SIZE:
4433         case VGT_HS_OFFCHIP_PARAM:
4434         case VGT_TF_MEMORY_BASE:
4435         case PA_CL_ENHANCE:
4436         case PA_SU_LINE_STIPPLE_VALUE:
4437         case PA_SC_LINE_STIPPLE_STATE:
4438         case PA_SC_ENHANCE:
4439         case SQC_CACHES:
4440         case SPI_STATIC_THREAD_MGMT_1:
4441         case SPI_STATIC_THREAD_MGMT_2:
4442         case SPI_STATIC_THREAD_MGMT_3:
4443         case SPI_PS_MAX_WAVE_ID:
4444         case SPI_CONFIG_CNTL:
4445         case SPI_CONFIG_CNTL_1:
4446         case TA_CNTL_AUX:
4447         case TA_CS_BC_BASE_ADDR:
4448                 return true;
4449         default:
4450                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4451                 return false;
4452         }
4453 }
4454
4455 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4456                                   u32 *ib, struct radeon_cs_packet *pkt)
4457 {
4458         switch (pkt->opcode) {
4459         case PACKET3_NOP:
4460         case PACKET3_SET_BASE:
4461         case PACKET3_SET_CE_DE_COUNTERS:
4462         case PACKET3_LOAD_CONST_RAM:
4463         case PACKET3_WRITE_CONST_RAM:
4464         case PACKET3_WRITE_CONST_RAM_OFFSET:
4465         case PACKET3_DUMP_CONST_RAM:
4466         case PACKET3_INCREMENT_CE_COUNTER:
4467         case PACKET3_WAIT_ON_DE_COUNTER:
4468         case PACKET3_CE_WRITE:
4469                 break;
4470         default:
4471                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4472                 return -EINVAL;
4473         }
4474         return 0;
4475 }
4476
4477 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4478 {
4479         u32 start_reg, reg, i;
4480         u32 command = ib[idx + 4];
4481         u32 info = ib[idx + 1];
4482         u32 idx_value = ib[idx];
4483         if (command & PACKET3_CP_DMA_CMD_SAS) {
4484                 /* src address space is register */
4485                 if (((info & 0x60000000) >> 29) == 0) {
4486                         start_reg = idx_value << 2;
4487                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4488                                 reg = start_reg;
4489                                 if (!si_vm_reg_valid(reg)) {
4490                                         DRM_ERROR("CP DMA Bad SRC register\n");
4491                                         return -EINVAL;
4492                                 }
4493                         } else {
4494                                 for (i = 0; i < (command & 0x1fffff); i++) {
4495                                         reg = start_reg + (4 * i);
4496                                         if (!si_vm_reg_valid(reg)) {
4497                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4498                                                 return -EINVAL;
4499                                         }
4500                                 }
4501                         }
4502                 }
4503         }
4504         if (command & PACKET3_CP_DMA_CMD_DAS) {
4505                 /* dst address space is register */
4506                 if (((info & 0x00300000) >> 20) == 0) {
4507                         start_reg = ib[idx + 2];
4508                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4509                                 reg = start_reg;
4510                                 if (!si_vm_reg_valid(reg)) {
4511                                         DRM_ERROR("CP DMA Bad DST register\n");
4512                                         return -EINVAL;
4513                                 }
4514                         } else {
4515                                 for (i = 0; i < (command & 0x1fffff); i++) {
4516                                         reg = start_reg + (4 * i);
4517                                 if (!si_vm_reg_valid(reg)) {
4518                                                 DRM_ERROR("CP DMA Bad DST register\n");
4519                                                 return -EINVAL;
4520                                         }
4521                                 }
4522                         }
4523                 }
4524         }
4525         return 0;
4526 }
4527
4528 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4529                                    u32 *ib, struct radeon_cs_packet *pkt)
4530 {
4531         int r;
4532         u32 idx = pkt->idx + 1;
4533         u32 idx_value = ib[idx];
4534         u32 start_reg, end_reg, reg, i;
4535
4536         switch (pkt->opcode) {
4537         case PACKET3_NOP:
4538         case PACKET3_SET_BASE:
4539         case PACKET3_CLEAR_STATE:
4540         case PACKET3_INDEX_BUFFER_SIZE:
4541         case PACKET3_DISPATCH_DIRECT:
4542         case PACKET3_DISPATCH_INDIRECT:
4543         case PACKET3_ALLOC_GDS:
4544         case PACKET3_WRITE_GDS_RAM:
4545         case PACKET3_ATOMIC_GDS:
4546         case PACKET3_ATOMIC:
4547         case PACKET3_OCCLUSION_QUERY:
4548         case PACKET3_SET_PREDICATION:
4549         case PACKET3_COND_EXEC:
4550         case PACKET3_PRED_EXEC:
4551         case PACKET3_DRAW_INDIRECT:
4552         case PACKET3_DRAW_INDEX_INDIRECT:
4553         case PACKET3_INDEX_BASE:
4554         case PACKET3_DRAW_INDEX_2:
4555         case PACKET3_CONTEXT_CONTROL:
4556         case PACKET3_INDEX_TYPE:
4557         case PACKET3_DRAW_INDIRECT_MULTI:
4558         case PACKET3_DRAW_INDEX_AUTO:
4559         case PACKET3_DRAW_INDEX_IMMD:
4560         case PACKET3_NUM_INSTANCES:
4561         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4562         case PACKET3_STRMOUT_BUFFER_UPDATE:
4563         case PACKET3_DRAW_INDEX_OFFSET_2:
4564         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4565         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4566         case PACKET3_MPEG_INDEX:
4567         case PACKET3_WAIT_REG_MEM:
4568         case PACKET3_MEM_WRITE:
4569         case PACKET3_PFP_SYNC_ME:
4570         case PACKET3_SURFACE_SYNC:
4571         case PACKET3_EVENT_WRITE:
4572         case PACKET3_EVENT_WRITE_EOP:
4573         case PACKET3_EVENT_WRITE_EOS:
4574         case PACKET3_SET_CONTEXT_REG:
4575         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4576         case PACKET3_SET_SH_REG:
4577         case PACKET3_SET_SH_REG_OFFSET:
4578         case PACKET3_INCREMENT_DE_COUNTER:
4579         case PACKET3_WAIT_ON_CE_COUNTER:
4580         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4581         case PACKET3_ME_WRITE:
4582                 break;
4583         case PACKET3_COPY_DATA:
4584                 if ((idx_value & 0xf00) == 0) {
4585                         reg = ib[idx + 3] * 4;
4586                         if (!si_vm_reg_valid(reg))
4587                                 return -EINVAL;
4588                 }
4589                 break;
4590         case PACKET3_WRITE_DATA:
4591                 if ((idx_value & 0xf00) == 0) {
4592                         start_reg = ib[idx + 1] * 4;
4593                         if (idx_value & 0x10000) {
4594                                 if (!si_vm_reg_valid(start_reg))
4595                                         return -EINVAL;
4596                         } else {
4597                                 for (i = 0; i < (pkt->count - 2); i++) {
4598                                         reg = start_reg + (4 * i);
4599                                         if (!si_vm_reg_valid(reg))
4600                                                 return -EINVAL;
4601                                 }
4602                         }
4603                 }
4604                 break;
4605         case PACKET3_COND_WRITE:
4606                 if (idx_value & 0x100) {
4607                         reg = ib[idx + 5] * 4;
4608                         if (!si_vm_reg_valid(reg))
4609                                 return -EINVAL;
4610                 }
4611                 break;
4612         case PACKET3_COPY_DW:
4613                 if (idx_value & 0x2) {
4614                         reg = ib[idx + 3] * 4;
4615                         if (!si_vm_reg_valid(reg))
4616                                 return -EINVAL;
4617                 }
4618                 break;
4619         case PACKET3_SET_CONFIG_REG:
4620                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4621                 end_reg = 4 * pkt->count + start_reg - 4;
4622                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4623                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4624                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4625                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4626                         return -EINVAL;
4627                 }
4628                 for (i = 0; i < pkt->count; i++) {
4629                         reg = start_reg + (4 * i);
4630                         if (!si_vm_reg_valid(reg))
4631                                 return -EINVAL;
4632                 }
4633                 break;
4634         case PACKET3_CP_DMA:
4635                 r = si_vm_packet3_cp_dma_check(ib, idx);
4636                 if (r)
4637                         return r;
4638                 break;
4639         default:
4640                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4641                 return -EINVAL;
4642         }
4643         return 0;
4644 }
4645
4646 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4647                                        u32 *ib, struct radeon_cs_packet *pkt)
4648 {
4649         int r;
4650         u32 idx = pkt->idx + 1;
4651         u32 idx_value = ib[idx];
4652         u32 start_reg, reg, i;
4653
4654         switch (pkt->opcode) {
4655         case PACKET3_NOP:
4656         case PACKET3_SET_BASE:
4657         case PACKET3_CLEAR_STATE:
4658         case PACKET3_DISPATCH_DIRECT:
4659         case PACKET3_DISPATCH_INDIRECT:
4660         case PACKET3_ALLOC_GDS:
4661         case PACKET3_WRITE_GDS_RAM:
4662         case PACKET3_ATOMIC_GDS:
4663         case PACKET3_ATOMIC:
4664         case PACKET3_OCCLUSION_QUERY:
4665         case PACKET3_SET_PREDICATION:
4666         case PACKET3_COND_EXEC:
4667         case PACKET3_PRED_EXEC:
4668         case PACKET3_CONTEXT_CONTROL:
4669         case PACKET3_STRMOUT_BUFFER_UPDATE:
4670         case PACKET3_WAIT_REG_MEM:
4671         case PACKET3_MEM_WRITE:
4672         case PACKET3_PFP_SYNC_ME:
4673         case PACKET3_SURFACE_SYNC:
4674         case PACKET3_EVENT_WRITE:
4675         case PACKET3_EVENT_WRITE_EOP:
4676         case PACKET3_EVENT_WRITE_EOS:
4677         case PACKET3_SET_CONTEXT_REG:
4678         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4679         case PACKET3_SET_SH_REG:
4680         case PACKET3_SET_SH_REG_OFFSET:
4681         case PACKET3_INCREMENT_DE_COUNTER:
4682         case PACKET3_WAIT_ON_CE_COUNTER:
4683         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4684         case PACKET3_ME_WRITE:
4685                 break;
4686         case PACKET3_COPY_DATA:
4687                 if ((idx_value & 0xf00) == 0) {
4688                         reg = ib[idx + 3] * 4;
4689                         if (!si_vm_reg_valid(reg))
4690                                 return -EINVAL;
4691                 }
4692                 break;
4693         case PACKET3_WRITE_DATA:
4694                 if ((idx_value & 0xf00) == 0) {
4695                         start_reg = ib[idx + 1] * 4;
4696                         if (idx_value & 0x10000) {
4697                                 if (!si_vm_reg_valid(start_reg))
4698                                         return -EINVAL;
4699                         } else {
4700                                 for (i = 0; i < (pkt->count - 2); i++) {
4701                                         reg = start_reg + (4 * i);
4702                                         if (!si_vm_reg_valid(reg))
4703                                                 return -EINVAL;
4704                                 }
4705                         }
4706                 }
4707                 break;
4708         case PACKET3_COND_WRITE:
4709                 if (idx_value & 0x100) {
4710                         reg = ib[idx + 5] * 4;
4711                         if (!si_vm_reg_valid(reg))
4712                                 return -EINVAL;
4713                 }
4714                 break;
4715         case PACKET3_COPY_DW:
4716                 if (idx_value & 0x2) {
4717                         reg = ib[idx + 3] * 4;
4718                         if (!si_vm_reg_valid(reg))
4719                                 return -EINVAL;
4720                 }
4721                 break;
4722         case PACKET3_CP_DMA:
4723                 r = si_vm_packet3_cp_dma_check(ib, idx);
4724                 if (r)
4725                         return r;
4726                 break;
4727         default:
4728                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4729                 return -EINVAL;
4730         }
4731         return 0;
4732 }
4733
4734 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4735 {
4736         int ret = 0;
4737         u32 idx = 0, i;
4738         struct radeon_cs_packet pkt;
4739
4740         do {
4741                 pkt.idx = idx;
4742                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4743                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4744                 pkt.one_reg_wr = 0;
4745                 switch (pkt.type) {
4746                 case RADEON_PACKET_TYPE0:
4747                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4748                         ret = -EINVAL;
4749                         break;
4750                 case RADEON_PACKET_TYPE2:
4751                         idx += 1;
4752                         break;
4753                 case RADEON_PACKET_TYPE3:
4754                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4755                         if (ib->is_const_ib)
4756                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4757                         else {
4758                                 switch (ib->ring) {
4759                                 case RADEON_RING_TYPE_GFX_INDEX:
4760                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4761                                         break;
4762                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4763                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4764                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4765                                         break;
4766                                 default:
4767                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4768                                         ret = -EINVAL;
4769                                         break;
4770                                 }
4771                         }
4772                         idx += pkt.count + 2;
4773                         break;
4774                 default:
4775                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4776                         ret = -EINVAL;
4777                         break;
4778                 }
4779                 if (ret) {
4780                         for (i = 0; i < ib->length_dw; i++) {
4781                                 if (i == idx)
4782                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4783                                 else
4784                                         printk("\t0x%08x\n", ib->ptr[i]);
4785                         }
4786                         break;
4787                 }
4788         } while (idx < ib->length_dw);
4789
4790         return ret;
4791 }
4792
4793 /*
4794  * vm
4795  */
4796 int si_vm_init(struct radeon_device *rdev)
4797 {
4798         /* number of VMs */
4799         rdev->vm_manager.nvm = 16;
4800         /* base offset of vram pages */
4801         rdev->vm_manager.vram_base_offset = 0;
4802
4803         return 0;
4804 }
4805
4806 void si_vm_fini(struct radeon_device *rdev)
4807 {
4808 }
4809
4810 /**
4811  * si_vm_decode_fault - print human readable fault info
4812  *
4813  * @rdev: radeon_device pointer
4814  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4815  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4816  *
4817  * Print human readable fault information (SI).
4818  */
4819 static void si_vm_decode_fault(struct radeon_device *rdev,
4820                                u32 status, u32 addr)
4821 {
4822         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4823         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4824         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4825         char *block;
4826
4827         if (rdev->family == CHIP_TAHITI) {
4828                 switch (mc_id) {
4829                 case 160:
4830                 case 144:
4831                 case 96:
4832                 case 80:
4833                 case 224:
4834                 case 208:
4835                 case 32:
4836                 case 16:
4837                         block = "CB";
4838                         break;
4839                 case 161:
4840                 case 145:
4841                 case 97:
4842                 case 81:
4843                 case 225:
4844                 case 209:
4845                 case 33:
4846                 case 17:
4847                         block = "CB_FMASK";
4848                         break;
4849                 case 162:
4850                 case 146:
4851                 case 98:
4852                 case 82:
4853                 case 226:
4854                 case 210:
4855                 case 34:
4856                 case 18:
4857                         block = "CB_CMASK";
4858                         break;
4859                 case 163:
4860                 case 147:
4861                 case 99:
4862                 case 83:
4863                 case 227:
4864                 case 211:
4865                 case 35:
4866                 case 19:
4867                         block = "CB_IMMED";
4868                         break;
4869                 case 164:
4870                 case 148:
4871                 case 100:
4872                 case 84:
4873                 case 228:
4874                 case 212:
4875                 case 36:
4876                 case 20:
4877                         block = "DB";
4878                         break;
4879                 case 165:
4880                 case 149:
4881                 case 101:
4882                 case 85:
4883                 case 229:
4884                 case 213:
4885                 case 37:
4886                 case 21:
4887                         block = "DB_HTILE";
4888                         break;
4889                 case 167:
4890                 case 151:
4891                 case 103:
4892                 case 87:
4893                 case 231:
4894                 case 215:
4895                 case 39:
4896                 case 23:
4897                         block = "DB_STEN";
4898                         break;
4899                 case 72:
4900                 case 68:
4901                 case 64:
4902                 case 8:
4903                 case 4:
4904                 case 0:
4905                 case 136:
4906                 case 132:
4907                 case 128:
4908                 case 200:
4909                 case 196:
4910                 case 192:
4911                         block = "TC";
4912                         break;
4913                 case 112:
4914                 case 48:
4915                         block = "CP";
4916                         break;
4917                 case 49:
4918                 case 177:
4919                 case 50:
4920                 case 178:
4921                         block = "SH";
4922                         break;
4923                 case 53:
4924                 case 190:
4925                         block = "VGT";
4926                         break;
4927                 case 117:
4928                         block = "IH";
4929                         break;
4930                 case 51:
4931                 case 115:
4932                         block = "RLC";
4933                         break;
4934                 case 119:
4935                 case 183:
4936                         block = "DMA0";
4937                         break;
4938                 case 61:
4939                         block = "DMA1";
4940                         break;
4941                 case 248:
4942                 case 120:
4943                         block = "HDP";
4944                         break;
4945                 default:
4946                         block = "unknown";
4947                         break;
4948                 }
4949         } else {
4950                 switch (mc_id) {
4951                 case 32:
4952                 case 16:
4953                 case 96:
4954                 case 80:
4955                 case 160:
4956                 case 144:
4957                 case 224:
4958                 case 208:
4959                         block = "CB";
4960                         break;
4961                 case 33:
4962                 case 17:
4963                 case 97:
4964                 case 81:
4965                 case 161:
4966                 case 145:
4967                 case 225:
4968                 case 209:
4969                         block = "CB_FMASK";
4970                         break;
4971                 case 34:
4972                 case 18:
4973                 case 98:
4974                 case 82:
4975                 case 162:
4976                 case 146:
4977                 case 226:
4978                 case 210:
4979                         block = "CB_CMASK";
4980                         break;
4981                 case 35:
4982                 case 19:
4983                 case 99:
4984                 case 83:
4985                 case 163:
4986                 case 147:
4987                 case 227:
4988                 case 211:
4989                         block = "CB_IMMED";
4990                         break;
4991                 case 36:
4992                 case 20:
4993                 case 100:
4994                 case 84:
4995                 case 164:
4996                 case 148:
4997                 case 228:
4998                 case 212:
4999                         block = "DB";
5000                         break;
5001                 case 37:
5002                 case 21:
5003                 case 101:
5004                 case 85:
5005                 case 165:
5006                 case 149:
5007                 case 229:
5008                 case 213:
5009                         block = "DB_HTILE";
5010                         break;
5011                 case 39:
5012                 case 23:
5013                 case 103:
5014                 case 87:
5015                 case 167:
5016                 case 151:
5017                 case 231:
5018                 case 215:
5019                         block = "DB_STEN";
5020                         break;
5021                 case 72:
5022                 case 68:
5023                 case 8:
5024                 case 4:
5025                 case 136:
5026                 case 132:
5027                 case 200:
5028                 case 196:
5029                         block = "TC";
5030                         break;
5031                 case 112:
5032                 case 48:
5033                         block = "CP";
5034                         break;
5035                 case 49:
5036                 case 177:
5037                 case 50:
5038                 case 178:
5039                         block = "SH";
5040                         break;
5041                 case 53:
5042                         block = "VGT";
5043                         break;
5044                 case 117:
5045                         block = "IH";
5046                         break;
5047                 case 51:
5048                 case 115:
5049                         block = "RLC";
5050                         break;
5051                 case 119:
5052                 case 183:
5053                         block = "DMA0";
5054                         break;
5055                 case 61:
5056                         block = "DMA1";
5057                         break;
5058                 case 248:
5059                 case 120:
5060                         block = "HDP";
5061                         break;
5062                 default:
5063                         block = "unknown";
5064                         break;
5065                 }
5066         }
5067
5068         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5069                protections, vmid, addr,
5070                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5071                block, mc_id);
5072 }
5073
5074 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5075                  unsigned vm_id, uint64_t pd_addr)
5076 {
5077         /* write new base address */
5078         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5079         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5080                                  WRITE_DATA_DST_SEL(0)));
5081
5082         if (vm_id < 8) {
5083                 radeon_ring_write(ring,
5084                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5085         } else {
5086                 radeon_ring_write(ring,
5087                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5088         }
5089         radeon_ring_write(ring, 0);
5090         radeon_ring_write(ring, pd_addr >> 12);
5091
5092         /* flush hdp cache */
5093         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5094         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5095                                  WRITE_DATA_DST_SEL(0)));
5096         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5097         radeon_ring_write(ring, 0);
5098         radeon_ring_write(ring, 0x1);
5099
5100         /* bits 0-15 are the VM contexts0-15 */
5101         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5102         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5103                                  WRITE_DATA_DST_SEL(0)));
5104         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5105         radeon_ring_write(ring, 0);
5106         radeon_ring_write(ring, 1 << vm_id);
5107
5108         /* wait for the invalidate to complete */
5109         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5110         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5111                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5112         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5113         radeon_ring_write(ring, 0);
5114         radeon_ring_write(ring, 0); /* ref */
5115         radeon_ring_write(ring, 0); /* mask */
5116         radeon_ring_write(ring, 0x20); /* poll interval */
5117
5118         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5119         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5120         radeon_ring_write(ring, 0x0);
5121 }
5122
5123 /*
5124  *  Power and clock gating
5125  */
5126 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5127 {
5128         int i;
5129
5130         for (i = 0; i < rdev->usec_timeout; i++) {
5131                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5132                         break;
5133                 udelay(1);
5134         }
5135
5136         for (i = 0; i < rdev->usec_timeout; i++) {
5137                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5138                         break;
5139                 udelay(1);
5140         }
5141 }
5142
5143 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5144                                          bool enable)
5145 {
5146         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5147         u32 mask;
5148         int i;
5149
5150         if (enable)
5151                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5152         else
5153                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5154         WREG32(CP_INT_CNTL_RING0, tmp);
5155
5156         if (!enable) {
5157                 /* read a gfx register */
5158                 tmp = RREG32(DB_DEPTH_INFO);
5159
5160                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5161                 for (i = 0; i < rdev->usec_timeout; i++) {
5162                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5163                                 break;
5164                         udelay(1);
5165                 }
5166         }
5167 }
5168
5169 static void si_set_uvd_dcm(struct radeon_device *rdev,
5170                            bool sw_mode)
5171 {
5172         u32 tmp, tmp2;
5173
5174         tmp = RREG32(UVD_CGC_CTRL);
5175         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5176         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5177
5178         if (sw_mode) {
5179                 tmp &= ~0x7ffff800;
5180                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5181         } else {
5182                 tmp |= 0x7ffff800;
5183                 tmp2 = 0;
5184         }
5185
5186         WREG32(UVD_CGC_CTRL, tmp);
5187         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5188 }
5189
5190 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5191 {
5192         bool hw_mode = true;
5193
5194         if (hw_mode) {
5195                 si_set_uvd_dcm(rdev, false);
5196         } else {
5197                 u32 tmp = RREG32(UVD_CGC_CTRL);
5198                 tmp &= ~DCM;
5199                 WREG32(UVD_CGC_CTRL, tmp);
5200         }
5201 }
5202
5203 static u32 si_halt_rlc(struct radeon_device *rdev)
5204 {
5205         u32 data, orig;
5206
5207         orig = data = RREG32(RLC_CNTL);
5208
5209         if (data & RLC_ENABLE) {
5210                 data &= ~RLC_ENABLE;
5211                 WREG32(RLC_CNTL, data);
5212
5213                 si_wait_for_rlc_serdes(rdev);
5214         }
5215
5216         return orig;
5217 }
5218
5219 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5220 {
5221         u32 tmp;
5222
5223         tmp = RREG32(RLC_CNTL);
5224         if (tmp != rlc)
5225                 WREG32(RLC_CNTL, rlc);
5226 }
5227
5228 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5229 {
5230         u32 data, orig;
5231
5232         orig = data = RREG32(DMA_PG);
5233         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5234                 data |= PG_CNTL_ENABLE;
5235         else
5236                 data &= ~PG_CNTL_ENABLE;
5237         if (orig != data)
5238                 WREG32(DMA_PG, data);
5239 }
5240
5241 static void si_init_dma_pg(struct radeon_device *rdev)
5242 {
5243         u32 tmp;
5244
5245         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5246         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5247
5248         for (tmp = 0; tmp < 5; tmp++)
5249                 WREG32(DMA_PGFSM_WRITE, 0);
5250 }
5251
5252 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5253                                bool enable)
5254 {
5255         u32 tmp;
5256
5257         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5258                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5259                 WREG32(RLC_TTOP_D, tmp);
5260
5261                 tmp = RREG32(RLC_PG_CNTL);
5262                 tmp |= GFX_PG_ENABLE;
5263                 WREG32(RLC_PG_CNTL, tmp);
5264
5265                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5266                 tmp |= AUTO_PG_EN;
5267                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5268         } else {
5269                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5270                 tmp &= ~AUTO_PG_EN;
5271                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5272
5273                 tmp = RREG32(DB_RENDER_CONTROL);
5274         }
5275 }
5276
5277 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5278 {
5279         u32 tmp;
5280
5281         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5282
5283         tmp = RREG32(RLC_PG_CNTL);
5284         tmp |= GFX_PG_SRC;
5285         WREG32(RLC_PG_CNTL, tmp);
5286
5287         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5288
5289         tmp = RREG32(RLC_AUTO_PG_CTRL);
5290
5291         tmp &= ~GRBM_REG_SGIT_MASK;
5292         tmp |= GRBM_REG_SGIT(0x700);
5293         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5294         WREG32(RLC_AUTO_PG_CTRL, tmp);
5295 }
5296
5297 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5298 {
5299         u32 mask = 0, tmp, tmp1;
5300         int i;
5301
5302         si_select_se_sh(rdev, se, sh);
5303         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5304         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5305         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5306
5307         tmp &= 0xffff0000;
5308
5309         tmp |= tmp1;
5310         tmp >>= 16;
5311
5312         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5313                 mask <<= 1;
5314                 mask |= 1;
5315         }
5316
5317         return (~tmp) & mask;
5318 }
5319
5320 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5321 {
5322         u32 i, j, k, active_cu_number = 0;
5323         u32 mask, counter, cu_bitmap;
5324         u32 tmp = 0;
5325
5326         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5327                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5328                         mask = 1;
5329                         cu_bitmap = 0;
5330                         counter  = 0;
5331                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5332                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5333                                         if (counter < 2)
5334                                                 cu_bitmap |= mask;
5335                                         counter++;
5336                                 }
5337                                 mask <<= 1;
5338                         }
5339
5340                         active_cu_number += counter;
5341                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5342                 }
5343         }
5344
5345         WREG32(RLC_PG_AO_CU_MASK, tmp);
5346
5347         tmp = RREG32(RLC_MAX_PG_CU);
5348         tmp &= ~MAX_PU_CU_MASK;
5349         tmp |= MAX_PU_CU(active_cu_number);
5350         WREG32(RLC_MAX_PG_CU, tmp);
5351 }
5352
5353 static void si_enable_cgcg(struct radeon_device *rdev,
5354                            bool enable)
5355 {
5356         u32 data, orig, tmp;
5357
5358         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5359
5360         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5361                 si_enable_gui_idle_interrupt(rdev, true);
5362
5363                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5364
5365                 tmp = si_halt_rlc(rdev);
5366
5367                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5368                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5369                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5370
5371                 si_wait_for_rlc_serdes(rdev);
5372
5373                 si_update_rlc(rdev, tmp);
5374
5375                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5376
5377                 data |= CGCG_EN | CGLS_EN;
5378         } else {
5379                 si_enable_gui_idle_interrupt(rdev, false);
5380
5381                 RREG32(CB_CGTT_SCLK_CTRL);
5382                 RREG32(CB_CGTT_SCLK_CTRL);
5383                 RREG32(CB_CGTT_SCLK_CTRL);
5384                 RREG32(CB_CGTT_SCLK_CTRL);
5385
5386                 data &= ~(CGCG_EN | CGLS_EN);
5387         }
5388
5389         if (orig != data)
5390                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5391 }
5392
5393 static void si_enable_mgcg(struct radeon_device *rdev,
5394                            bool enable)
5395 {
5396         u32 data, orig, tmp = 0;
5397
5398         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5399                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5400                 data = 0x96940200;
5401                 if (orig != data)
5402                         WREG32(CGTS_SM_CTRL_REG, data);
5403
5404                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5405                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5406                         data |= CP_MEM_LS_EN;
5407                         if (orig != data)
5408                                 WREG32(CP_MEM_SLP_CNTL, data);
5409                 }
5410
5411                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5412                 data &= 0xffffffc0;
5413                 if (orig != data)
5414                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5415
5416                 tmp = si_halt_rlc(rdev);
5417
5418                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5419                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5420                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5421
5422                 si_update_rlc(rdev, tmp);
5423         } else {
5424                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5425                 data |= 0x00000003;
5426                 if (orig != data)
5427                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5428
5429                 data = RREG32(CP_MEM_SLP_CNTL);
5430                 if (data & CP_MEM_LS_EN) {
5431                         data &= ~CP_MEM_LS_EN;
5432                         WREG32(CP_MEM_SLP_CNTL, data);
5433                 }
5434                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5435                 data |= LS_OVERRIDE | OVERRIDE;
5436                 if (orig != data)
5437                         WREG32(CGTS_SM_CTRL_REG, data);
5438
5439                 tmp = si_halt_rlc(rdev);
5440
5441                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5442                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5443                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5444
5445                 si_update_rlc(rdev, tmp);
5446         }
5447 }
5448
5449 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5450                                bool enable)
5451 {
5452         u32 orig, data, tmp;
5453
5454         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5455                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5456                 tmp |= 0x3fff;
5457                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5458
5459                 orig = data = RREG32(UVD_CGC_CTRL);
5460                 data |= DCM;
5461                 if (orig != data)
5462                         WREG32(UVD_CGC_CTRL, data);
5463
5464                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5465                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5466         } else {
5467                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5468                 tmp &= ~0x3fff;
5469                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5470
5471                 orig = data = RREG32(UVD_CGC_CTRL);
5472                 data &= ~DCM;
5473                 if (orig != data)
5474                         WREG32(UVD_CGC_CTRL, data);
5475
5476                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5477                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5478         }
5479 }
5480
5481 static const u32 mc_cg_registers[] =
5482 {
5483         MC_HUB_MISC_HUB_CG,
5484         MC_HUB_MISC_SIP_CG,
5485         MC_HUB_MISC_VM_CG,
5486         MC_XPB_CLK_GAT,
5487         ATC_MISC_CG,
5488         MC_CITF_MISC_WR_CG,
5489         MC_CITF_MISC_RD_CG,
5490         MC_CITF_MISC_VM_CG,
5491         VM_L2_CG,
5492 };
5493
5494 static void si_enable_mc_ls(struct radeon_device *rdev,
5495                             bool enable)
5496 {
5497         int i;
5498         u32 orig, data;
5499
5500         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5501                 orig = data = RREG32(mc_cg_registers[i]);
5502                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5503                         data |= MC_LS_ENABLE;
5504                 else
5505                         data &= ~MC_LS_ENABLE;
5506                 if (data != orig)
5507                         WREG32(mc_cg_registers[i], data);
5508         }
5509 }
5510
5511 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5512                                bool enable)
5513 {
5514         int i;
5515         u32 orig, data;
5516
5517         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5518                 orig = data = RREG32(mc_cg_registers[i]);
5519                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5520                         data |= MC_CG_ENABLE;
5521                 else
5522                         data &= ~MC_CG_ENABLE;
5523                 if (data != orig)
5524                         WREG32(mc_cg_registers[i], data);
5525         }
5526 }
5527
5528 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5529                                bool enable)
5530 {
5531         u32 orig, data, offset;
5532         int i;
5533
5534         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5535                 for (i = 0; i < 2; i++) {
5536                         if (i == 0)
5537                                 offset = DMA0_REGISTER_OFFSET;
5538                         else
5539                                 offset = DMA1_REGISTER_OFFSET;
5540                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5541                         data &= ~MEM_POWER_OVERRIDE;
5542                         if (data != orig)
5543                                 WREG32(DMA_POWER_CNTL + offset, data);
5544                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5545                 }
5546         } else {
5547                 for (i = 0; i < 2; i++) {
5548                         if (i == 0)
5549                                 offset = DMA0_REGISTER_OFFSET;
5550                         else
5551                                 offset = DMA1_REGISTER_OFFSET;
5552                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5553                         data |= MEM_POWER_OVERRIDE;
5554                         if (data != orig)
5555                                 WREG32(DMA_POWER_CNTL + offset, data);
5556
5557                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5558                         data = 0xff000000;
5559                         if (data != orig)
5560                                 WREG32(DMA_CLK_CTRL + offset, data);
5561                 }
5562         }
5563 }
5564
5565 static void si_enable_bif_mgls(struct radeon_device *rdev,
5566                                bool enable)
5567 {
5568         u32 orig, data;
5569
5570         orig = data = RREG32_PCIE(PCIE_CNTL2);
5571
5572         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5573                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5574                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5575         else
5576                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5577                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5578
5579         if (orig != data)
5580                 WREG32_PCIE(PCIE_CNTL2, data);
5581 }
5582
5583 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5584                                bool enable)
5585 {
5586         u32 orig, data;
5587
5588         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5589
5590         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5591                 data &= ~CLOCK_GATING_DIS;
5592         else
5593                 data |= CLOCK_GATING_DIS;
5594
5595         if (orig != data)
5596                 WREG32(HDP_HOST_PATH_CNTL, data);
5597 }
5598
5599 static void si_enable_hdp_ls(struct radeon_device *rdev,
5600                              bool enable)
5601 {
5602         u32 orig, data;
5603
5604         orig = data = RREG32(HDP_MEM_POWER_LS);
5605
5606         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5607                 data |= HDP_LS_ENABLE;
5608         else
5609                 data &= ~HDP_LS_ENABLE;
5610
5611         if (orig != data)
5612                 WREG32(HDP_MEM_POWER_LS, data);
5613 }
5614
5615 static void si_update_cg(struct radeon_device *rdev,
5616                          u32 block, bool enable)
5617 {
5618         if (block & RADEON_CG_BLOCK_GFX) {
5619                 si_enable_gui_idle_interrupt(rdev, false);
5620                 /* order matters! */
5621                 if (enable) {
5622                         si_enable_mgcg(rdev, true);
5623                         si_enable_cgcg(rdev, true);
5624                 } else {
5625                         si_enable_cgcg(rdev, false);
5626                         si_enable_mgcg(rdev, false);
5627                 }
5628                 si_enable_gui_idle_interrupt(rdev, true);
5629         }
5630
5631         if (block & RADEON_CG_BLOCK_MC) {
5632                 si_enable_mc_mgcg(rdev, enable);
5633                 si_enable_mc_ls(rdev, enable);
5634         }
5635
5636         if (block & RADEON_CG_BLOCK_SDMA) {
5637                 si_enable_dma_mgcg(rdev, enable);
5638         }
5639
5640         if (block & RADEON_CG_BLOCK_BIF) {
5641                 si_enable_bif_mgls(rdev, enable);
5642         }
5643
5644         if (block & RADEON_CG_BLOCK_UVD) {
5645                 if (rdev->has_uvd) {
5646                         si_enable_uvd_mgcg(rdev, enable);
5647                 }
5648         }
5649
5650         if (block & RADEON_CG_BLOCK_HDP) {
5651                 si_enable_hdp_mgcg(rdev, enable);
5652                 si_enable_hdp_ls(rdev, enable);
5653         }
5654 }
5655
5656 static void si_init_cg(struct radeon_device *rdev)
5657 {
5658         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5659                             RADEON_CG_BLOCK_MC |
5660                             RADEON_CG_BLOCK_SDMA |
5661                             RADEON_CG_BLOCK_BIF |
5662                             RADEON_CG_BLOCK_HDP), true);
5663         if (rdev->has_uvd) {
5664                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5665                 si_init_uvd_internal_cg(rdev);
5666         }
5667 }
5668
5669 static void si_fini_cg(struct radeon_device *rdev)
5670 {
5671         if (rdev->has_uvd) {
5672                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5673         }
5674         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5675                             RADEON_CG_BLOCK_MC |
5676                             RADEON_CG_BLOCK_SDMA |
5677                             RADEON_CG_BLOCK_BIF |
5678                             RADEON_CG_BLOCK_HDP), false);
5679 }
5680
5681 u32 si_get_csb_size(struct radeon_device *rdev)
5682 {
5683         u32 count = 0;
5684         const struct cs_section_def *sect = NULL;
5685         const struct cs_extent_def *ext = NULL;
5686
5687         if (rdev->rlc.cs_data == NULL)
5688                 return 0;
5689
5690         /* begin clear state */
5691         count += 2;
5692         /* context control state */
5693         count += 3;
5694
5695         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5696                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5697                         if (sect->id == SECT_CONTEXT)
5698                                 count += 2 + ext->reg_count;
5699                         else
5700                                 return 0;
5701                 }
5702         }
5703         /* pa_sc_raster_config */
5704         count += 3;
5705         /* end clear state */
5706         count += 2;
5707         /* clear state */
5708         count += 2;
5709
5710         return count;
5711 }
5712
5713 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5714 {
5715         u32 count = 0, i;
5716         const struct cs_section_def *sect = NULL;
5717         const struct cs_extent_def *ext = NULL;
5718
5719         if (rdev->rlc.cs_data == NULL)
5720                 return;
5721         if (buffer == NULL)
5722                 return;
5723
5724         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5725         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5726
5727         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5728         buffer[count++] = cpu_to_le32(0x80000000);
5729         buffer[count++] = cpu_to_le32(0x80000000);
5730
5731         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5732                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5733                         if (sect->id == SECT_CONTEXT) {
5734                                 buffer[count++] =
5735                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5736                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5737                                 for (i = 0; i < ext->reg_count; i++)
5738                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5739                         } else {
5740                                 return;
5741                         }
5742                 }
5743         }
5744
5745         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5746         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5747         switch (rdev->family) {
5748         case CHIP_TAHITI:
5749         case CHIP_PITCAIRN:
5750                 buffer[count++] = cpu_to_le32(0x2a00126a);
5751                 break;
5752         case CHIP_VERDE:
5753                 buffer[count++] = cpu_to_le32(0x0000124a);
5754                 break;
5755         case CHIP_OLAND:
5756                 buffer[count++] = cpu_to_le32(0x00000082);
5757                 break;
5758         case CHIP_HAINAN:
5759                 buffer[count++] = cpu_to_le32(0x00000000);
5760                 break;
5761         default:
5762                 buffer[count++] = cpu_to_le32(0x00000000);
5763                 break;
5764         }
5765
5766         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5767         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5768
5769         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5770         buffer[count++] = cpu_to_le32(0);
5771 }
5772
5773 static void si_init_pg(struct radeon_device *rdev)
5774 {
5775         if (rdev->pg_flags) {
5776                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5777                         si_init_dma_pg(rdev);
5778                 }
5779                 si_init_ao_cu_mask(rdev);
5780                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5781                         si_init_gfx_cgpg(rdev);
5782                 } else {
5783                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5784                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5785                 }
5786                 si_enable_dma_pg(rdev, true);
5787                 si_enable_gfx_cgpg(rdev, true);
5788         } else {
5789                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5790                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5791         }
5792 }
5793
5794 static void si_fini_pg(struct radeon_device *rdev)
5795 {
5796         if (rdev->pg_flags) {
5797                 si_enable_dma_pg(rdev, false);
5798                 si_enable_gfx_cgpg(rdev, false);
5799         }
5800 }
5801
5802 /*
5803  * RLC
5804  */
5805 void si_rlc_reset(struct radeon_device *rdev)
5806 {
5807         u32 tmp = RREG32(GRBM_SOFT_RESET);
5808
5809         tmp |= SOFT_RESET_RLC;
5810         WREG32(GRBM_SOFT_RESET, tmp);
5811         udelay(50);
5812         tmp &= ~SOFT_RESET_RLC;
5813         WREG32(GRBM_SOFT_RESET, tmp);
5814         udelay(50);
5815 }
5816
5817 static void si_rlc_stop(struct radeon_device *rdev)
5818 {
5819         WREG32(RLC_CNTL, 0);
5820
5821         si_enable_gui_idle_interrupt(rdev, false);
5822
5823         si_wait_for_rlc_serdes(rdev);
5824 }
5825
5826 static void si_rlc_start(struct radeon_device *rdev)
5827 {
5828         WREG32(RLC_CNTL, RLC_ENABLE);
5829
5830         si_enable_gui_idle_interrupt(rdev, true);
5831
5832         udelay(50);
5833 }
5834
5835 static bool si_lbpw_supported(struct radeon_device *rdev)
5836 {
5837         u32 tmp;
5838
5839         /* Enable LBPW only for DDR3 */
5840         tmp = RREG32(MC_SEQ_MISC0);
5841         if ((tmp & 0xF0000000) == 0xB0000000)
5842                 return true;
5843         return false;
5844 }
5845
5846 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5847 {
5848         u32 tmp;
5849
5850         tmp = RREG32(RLC_LB_CNTL);
5851         if (enable)
5852                 tmp |= LOAD_BALANCE_ENABLE;
5853         else
5854                 tmp &= ~LOAD_BALANCE_ENABLE;
5855         WREG32(RLC_LB_CNTL, tmp);
5856
5857         if (!enable) {
5858                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5859                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5860         }
5861 }
5862
5863 static int si_rlc_resume(struct radeon_device *rdev)
5864 {
5865         u32 i;
5866
5867         if (!rdev->rlc_fw)
5868                 return -EINVAL;
5869
5870         si_rlc_stop(rdev);
5871
5872         si_rlc_reset(rdev);
5873
5874         si_init_pg(rdev);
5875
5876         si_init_cg(rdev);
5877
5878         WREG32(RLC_RL_BASE, 0);
5879         WREG32(RLC_RL_SIZE, 0);
5880         WREG32(RLC_LB_CNTL, 0);
5881         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5882         WREG32(RLC_LB_CNTR_INIT, 0);
5883         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5884
5885         WREG32(RLC_MC_CNTL, 0);
5886         WREG32(RLC_UCODE_CNTL, 0);
5887
5888         if (rdev->new_fw) {
5889                 const struct rlc_firmware_header_v1_0 *hdr =
5890                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5891                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5892                 const __le32 *fw_data = (const __le32 *)
5893                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5894
5895                 radeon_ucode_print_rlc_hdr(&hdr->header);
5896
5897                 for (i = 0; i < fw_size; i++) {
5898                         WREG32(RLC_UCODE_ADDR, i);
5899                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5900                 }
5901         } else {
5902                 const __be32 *fw_data =
5903                         (const __be32 *)rdev->rlc_fw->data;
5904                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5905                         WREG32(RLC_UCODE_ADDR, i);
5906                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5907                 }
5908         }
5909         WREG32(RLC_UCODE_ADDR, 0);
5910
5911         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5912
5913         si_rlc_start(rdev);
5914
5915         return 0;
5916 }
5917
5918 static void si_enable_interrupts(struct radeon_device *rdev)
5919 {
5920         u32 ih_cntl = RREG32(IH_CNTL);
5921         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5922
5923         ih_cntl |= ENABLE_INTR;
5924         ih_rb_cntl |= IH_RB_ENABLE;
5925         WREG32(IH_CNTL, ih_cntl);
5926         WREG32(IH_RB_CNTL, ih_rb_cntl);
5927         rdev->ih.enabled = true;
5928 }
5929
5930 static void si_disable_interrupts(struct radeon_device *rdev)
5931 {
5932         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5933         u32 ih_cntl = RREG32(IH_CNTL);
5934
5935         ih_rb_cntl &= ~IH_RB_ENABLE;
5936         ih_cntl &= ~ENABLE_INTR;
5937         WREG32(IH_RB_CNTL, ih_rb_cntl);
5938         WREG32(IH_CNTL, ih_cntl);
5939         /* set rptr, wptr to 0 */
5940         WREG32(IH_RB_RPTR, 0);
5941         WREG32(IH_RB_WPTR, 0);
5942         rdev->ih.enabled = false;
5943         rdev->ih.rptr = 0;
5944 }
5945
5946 static void si_disable_interrupt_state(struct radeon_device *rdev)
5947 {
5948         u32 tmp;
5949
5950         tmp = RREG32(CP_INT_CNTL_RING0) &
5951                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5952         WREG32(CP_INT_CNTL_RING0, tmp);
5953         WREG32(CP_INT_CNTL_RING1, 0);
5954         WREG32(CP_INT_CNTL_RING2, 0);
5955         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5956         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5957         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5958         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5959         WREG32(GRBM_INT_CNTL, 0);
5960         WREG32(SRBM_INT_CNTL, 0);
5961         if (rdev->num_crtc >= 2) {
5962                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5963                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5964         }
5965         if (rdev->num_crtc >= 4) {
5966                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5967                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5968         }
5969         if (rdev->num_crtc >= 6) {
5970                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5971                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5972         }
5973
5974         if (rdev->num_crtc >= 2) {
5975                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5976                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5977         }
5978         if (rdev->num_crtc >= 4) {
5979                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5980                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5981         }
5982         if (rdev->num_crtc >= 6) {
5983                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5984                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5985         }
5986
5987         if (!ASIC_IS_NODCE(rdev)) {
5988                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5989
5990                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5991                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5992                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5993                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5994                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5995                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5996                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5997                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5998                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5999                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6000                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6001                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6002         }
6003 }
6004
6005 static int si_irq_init(struct radeon_device *rdev)
6006 {
6007         int ret = 0;
6008         int rb_bufsz;
6009         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6010
6011         /* allocate ring */
6012         ret = r600_ih_ring_alloc(rdev);
6013         if (ret)
6014                 return ret;
6015
6016         /* disable irqs */
6017         si_disable_interrupts(rdev);
6018
6019         /* init rlc */
6020         ret = si_rlc_resume(rdev);
6021         if (ret) {
6022                 r600_ih_ring_fini(rdev);
6023                 return ret;
6024         }
6025
6026         /* setup interrupt control */
6027         /* set dummy read address to ring address */
6028         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6029         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6030         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6031          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6032          */
6033         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6034         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6035         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6036         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6037
6038         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6039         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6040
6041         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6042                       IH_WPTR_OVERFLOW_CLEAR |
6043                       (rb_bufsz << 1));
6044
6045         if (rdev->wb.enabled)
6046                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6047
6048         /* set the writeback address whether it's enabled or not */
6049         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6050         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6051
6052         WREG32(IH_RB_CNTL, ih_rb_cntl);
6053
6054         /* set rptr, wptr to 0 */
6055         WREG32(IH_RB_RPTR, 0);
6056         WREG32(IH_RB_WPTR, 0);
6057
6058         /* Default settings for IH_CNTL (disabled at first) */
6059         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6060         /* RPTR_REARM only works if msi's are enabled */
6061         if (rdev->msi_enabled)
6062                 ih_cntl |= RPTR_REARM;
6063         WREG32(IH_CNTL, ih_cntl);
6064
6065         /* force the active interrupt state to all disabled */
6066         si_disable_interrupt_state(rdev);
6067
6068         pci_set_master(rdev->pdev);
6069
6070         /* enable irqs */
6071         si_enable_interrupts(rdev);
6072
6073         return ret;
6074 }
6075
6076 int si_irq_set(struct radeon_device *rdev)
6077 {
6078         u32 cp_int_cntl;
6079         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6080         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6081         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6082         u32 grbm_int_cntl = 0;
6083         u32 dma_cntl, dma_cntl1;
6084         u32 thermal_int = 0;
6085
6086         if (!rdev->irq.installed) {
6087                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6088                 return -EINVAL;
6089         }
6090         /* don't enable anything if the ih is disabled */
6091         if (!rdev->ih.enabled) {
6092                 si_disable_interrupts(rdev);
6093                 /* force the active interrupt state to all disabled */
6094                 si_disable_interrupt_state(rdev);
6095                 return 0;
6096         }
6097
6098         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6099                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6100
6101         if (!ASIC_IS_NODCE(rdev)) {
6102                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6103                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6104                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6105                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6106                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6107                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6108         }
6109
6110         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6111         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6112
6113         thermal_int = RREG32(CG_THERMAL_INT) &
6114                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6115
6116         /* enable CP interrupts on all rings */
6117         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6118                 DRM_DEBUG("si_irq_set: sw int gfx\n");
6119                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6120         }
6121         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6122                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6123                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6124         }
6125         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6126                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6127                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6128         }
6129         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6130                 DRM_DEBUG("si_irq_set: sw int dma\n");
6131                 dma_cntl |= TRAP_ENABLE;
6132         }
6133
6134         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6135                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6136                 dma_cntl1 |= TRAP_ENABLE;
6137         }
6138         if (rdev->irq.crtc_vblank_int[0] ||
6139             atomic_read(&rdev->irq.pflip[0])) {
6140                 DRM_DEBUG("si_irq_set: vblank 0\n");
6141                 crtc1 |= VBLANK_INT_MASK;
6142         }
6143         if (rdev->irq.crtc_vblank_int[1] ||
6144             atomic_read(&rdev->irq.pflip[1])) {
6145                 DRM_DEBUG("si_irq_set: vblank 1\n");
6146                 crtc2 |= VBLANK_INT_MASK;
6147         }
6148         if (rdev->irq.crtc_vblank_int[2] ||
6149             atomic_read(&rdev->irq.pflip[2])) {
6150                 DRM_DEBUG("si_irq_set: vblank 2\n");
6151                 crtc3 |= VBLANK_INT_MASK;
6152         }
6153         if (rdev->irq.crtc_vblank_int[3] ||
6154             atomic_read(&rdev->irq.pflip[3])) {
6155                 DRM_DEBUG("si_irq_set: vblank 3\n");
6156                 crtc4 |= VBLANK_INT_MASK;
6157         }
6158         if (rdev->irq.crtc_vblank_int[4] ||
6159             atomic_read(&rdev->irq.pflip[4])) {
6160                 DRM_DEBUG("si_irq_set: vblank 4\n");
6161                 crtc5 |= VBLANK_INT_MASK;
6162         }
6163         if (rdev->irq.crtc_vblank_int[5] ||
6164             atomic_read(&rdev->irq.pflip[5])) {
6165                 DRM_DEBUG("si_irq_set: vblank 5\n");
6166                 crtc6 |= VBLANK_INT_MASK;
6167         }
6168         if (rdev->irq.hpd[0]) {
6169                 DRM_DEBUG("si_irq_set: hpd 1\n");
6170                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6171         }
6172         if (rdev->irq.hpd[1]) {
6173                 DRM_DEBUG("si_irq_set: hpd 2\n");
6174                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6175         }
6176         if (rdev->irq.hpd[2]) {
6177                 DRM_DEBUG("si_irq_set: hpd 3\n");
6178                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6179         }
6180         if (rdev->irq.hpd[3]) {
6181                 DRM_DEBUG("si_irq_set: hpd 4\n");
6182                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6183         }
6184         if (rdev->irq.hpd[4]) {
6185                 DRM_DEBUG("si_irq_set: hpd 5\n");
6186                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6187         }
6188         if (rdev->irq.hpd[5]) {
6189                 DRM_DEBUG("si_irq_set: hpd 6\n");
6190                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6191         }
6192
6193         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6194         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6195         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6196
6197         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6198         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6199
6200         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6201
6202         if (rdev->irq.dpm_thermal) {
6203                 DRM_DEBUG("dpm thermal\n");
6204                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6205         }
6206
6207         if (rdev->num_crtc >= 2) {
6208                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6209                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6210         }
6211         if (rdev->num_crtc >= 4) {
6212                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6213                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6214         }
6215         if (rdev->num_crtc >= 6) {
6216                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6217                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6218         }
6219
6220         if (rdev->num_crtc >= 2) {
6221                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6222                        GRPH_PFLIP_INT_MASK);
6223                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6224                        GRPH_PFLIP_INT_MASK);
6225         }
6226         if (rdev->num_crtc >= 4) {
6227                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6228                        GRPH_PFLIP_INT_MASK);
6229                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6230                        GRPH_PFLIP_INT_MASK);
6231         }
6232         if (rdev->num_crtc >= 6) {
6233                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6234                        GRPH_PFLIP_INT_MASK);
6235                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6236                        GRPH_PFLIP_INT_MASK);
6237         }
6238
6239         if (!ASIC_IS_NODCE(rdev)) {
6240                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6241                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6242                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6243                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6244                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6245                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6246         }
6247
6248         WREG32(CG_THERMAL_INT, thermal_int);
6249
6250         /* posting read */
6251         RREG32(SRBM_STATUS);
6252
6253         return 0;
6254 }
6255
6256 static inline void si_irq_ack(struct radeon_device *rdev)
6257 {
6258         u32 tmp;
6259
6260         if (ASIC_IS_NODCE(rdev))
6261                 return;
6262
6263         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6264         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6265         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6266         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6267         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6268         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6269         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6270         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6271         if (rdev->num_crtc >= 4) {
6272                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6273                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6274         }
6275         if (rdev->num_crtc >= 6) {
6276                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6277                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6278         }
6279
6280         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6281                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6282         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6283                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6284         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6285                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6286         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6287                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6288         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6289                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6290         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6291                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6292
6293         if (rdev->num_crtc >= 4) {
6294                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6295                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6296                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6297                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6298                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6299                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6300                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6301                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6302                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6303                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6304                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6305                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6306         }
6307
6308         if (rdev->num_crtc >= 6) {
6309                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6310                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6311                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6312                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6313                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6314                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6315                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6316                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6317                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6318                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6319                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6320                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6321         }
6322
6323         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6324                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6325                 tmp |= DC_HPDx_INT_ACK;
6326                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6327         }
6328         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6329                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6330                 tmp |= DC_HPDx_INT_ACK;
6331                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6332         }
6333         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6334                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6335                 tmp |= DC_HPDx_INT_ACK;
6336                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6337         }
6338         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6339                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6340                 tmp |= DC_HPDx_INT_ACK;
6341                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6342         }
6343         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6344                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6345                 tmp |= DC_HPDx_INT_ACK;
6346                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6347         }
6348         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6349                 tmp = RREG32(DC_HPD6_INT_CONTROL);
6350                 tmp |= DC_HPDx_INT_ACK;
6351                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6352         }
6353
6354         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6355                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6356                 tmp |= DC_HPDx_RX_INT_ACK;
6357                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6358         }
6359         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6360                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6361                 tmp |= DC_HPDx_RX_INT_ACK;
6362                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6363         }
6364         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6365                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6366                 tmp |= DC_HPDx_RX_INT_ACK;
6367                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6368         }
6369         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6370                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6371                 tmp |= DC_HPDx_RX_INT_ACK;
6372                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6373         }
6374         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6375                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6376                 tmp |= DC_HPDx_RX_INT_ACK;
6377                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6378         }
6379         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6380                 tmp = RREG32(DC_HPD6_INT_CONTROL);
6381                 tmp |= DC_HPDx_RX_INT_ACK;
6382                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6383         }
6384 }
6385
6386 static void si_irq_disable(struct radeon_device *rdev)
6387 {
6388         si_disable_interrupts(rdev);
6389         /* Wait and acknowledge irq */
6390         mdelay(1);
6391         si_irq_ack(rdev);
6392         si_disable_interrupt_state(rdev);
6393 }
6394
6395 static void si_irq_suspend(struct radeon_device *rdev)
6396 {
6397         si_irq_disable(rdev);
6398         si_rlc_stop(rdev);
6399 }
6400
6401 static void si_irq_fini(struct radeon_device *rdev)
6402 {
6403         si_irq_suspend(rdev);
6404         r600_ih_ring_fini(rdev);
6405 }
6406
6407 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6408 {
6409         u32 wptr, tmp;
6410
6411         if (rdev->wb.enabled)
6412                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6413         else
6414                 wptr = RREG32(IH_RB_WPTR);
6415
6416         if (wptr & RB_OVERFLOW) {
6417                 wptr &= ~RB_OVERFLOW;
6418                 /* When a ring buffer overflow happen start parsing interrupt
6419                  * from the last not overwritten vector (wptr + 16). Hopefully
6420                  * this should allow us to catchup.
6421                  */
6422                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6423                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6424                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6425                 tmp = RREG32(IH_RB_CNTL);
6426                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6427                 WREG32(IH_RB_CNTL, tmp);
6428         }
6429         return (wptr & rdev->ih.ptr_mask);
6430 }
6431
6432 /*        SI IV Ring
6433  * Each IV ring entry is 128 bits:
6434  * [7:0]    - interrupt source id
6435  * [31:8]   - reserved
6436  * [59:32]  - interrupt source data
6437  * [63:60]  - reserved
6438  * [71:64]  - RINGID
6439  * [79:72]  - VMID
6440  * [127:80] - reserved
6441  */
6442 irqreturn_t si_irq_process(struct radeon_device *rdev)
6443 {
6444         u32 wptr;
6445         u32 rptr;
6446         u32 src_id, src_data, ring_id;
6447         u32 ring_index;
6448         bool queue_hotplug = false;
6449         bool queue_dp = false;
6450         bool queue_thermal = false;
6451         u32 status, addr;
6452
6453         if (!rdev->ih.enabled || rdev->shutdown)
6454                 return IRQ_NONE;
6455
6456         wptr = si_get_ih_wptr(rdev);
6457
6458 restart_ih:
6459         /* is somebody else already processing irqs? */
6460         if (atomic_xchg(&rdev->ih.lock, 1))
6461                 return IRQ_NONE;
6462
6463         rptr = rdev->ih.rptr;
6464         DRM_DEBUG_VBLANK("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6465
6466         /* Order reading of wptr vs. reading of IH ring data */
6467         rmb();
6468
6469         /* display interrupts */
6470         si_irq_ack(rdev);
6471
6472         while (rptr != wptr) {
6473                 /* wptr/rptr are in bytes! */
6474                 ring_index = rptr / 4;
6475                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6476                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6477                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6478
6479                 switch (src_id) {
6480                 case 1: /* D1 vblank/vline */
6481                         switch (src_data) {
6482                         case 0: /* D1 vblank */
6483                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6484                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6485
6486                                 if (rdev->irq.crtc_vblank_int[0]) {
6487                                         drm_handle_vblank(rdev->ddev, 0);
6488                                         rdev->pm.vblank_sync = true;
6489                                         wake_up(&rdev->irq.vblank_queue);
6490                                 }
6491                                 if (atomic_read(&rdev->irq.pflip[0]))
6492                                         radeon_crtc_handle_vblank(rdev, 0);
6493                                 rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6494                                 DRM_DEBUG_VBLANK("IH: D1 vblank\n");
6495
6496                                 break;
6497                         case 1: /* D1 vline */
6498                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6499                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6500
6501                                 rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6502                                 DRM_DEBUG_VBLANK("IH: D1 vline\n");
6503
6504                                 break;
6505                         default:
6506                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6507                                 break;
6508                         }
6509                         break;
6510                 case 2: /* D2 vblank/vline */
6511                         switch (src_data) {
6512                         case 0: /* D2 vblank */
6513                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6514                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6515
6516                                 if (rdev->irq.crtc_vblank_int[1]) {
6517                                         drm_handle_vblank(rdev->ddev, 1);
6518                                         rdev->pm.vblank_sync = true;
6519                                         wake_up(&rdev->irq.vblank_queue);
6520                                 }
6521                                 if (atomic_read(&rdev->irq.pflip[1]))
6522                                         radeon_crtc_handle_vblank(rdev, 1);
6523                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6524                                 DRM_DEBUG_VBLANK("IH: D2 vblank\n");
6525
6526                                 break;
6527                         case 1: /* D2 vline */
6528                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6529                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6530
6531                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6532                                 DRM_DEBUG_VBLANK("IH: D2 vline\n");
6533
6534                                 break;
6535                         default:
6536                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6537                                 break;
6538                         }
6539                         break;
6540                 case 3: /* D3 vblank/vline */
6541                         switch (src_data) {
6542                         case 0: /* D3 vblank */
6543                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6544                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6545
6546                                 if (rdev->irq.crtc_vblank_int[2]) {
6547                                         drm_handle_vblank(rdev->ddev, 2);
6548                                         rdev->pm.vblank_sync = true;
6549                                         wake_up(&rdev->irq.vblank_queue);
6550                                 }
6551                                 if (atomic_read(&rdev->irq.pflip[2]))
6552                                         radeon_crtc_handle_vblank(rdev, 2);
6553                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6554                                 DRM_DEBUG_VBLANK("IH: D3 vblank\n");
6555
6556                                 break;
6557                         case 1: /* D3 vline */
6558                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6559                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6560
6561                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6562                                 DRM_DEBUG_VBLANK("IH: D3 vline\n");
6563
6564                                 break;
6565                         default:
6566                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6567                                 break;
6568                         }
6569                         break;
6570                 case 4: /* D4 vblank/vline */
6571                         switch (src_data) {
6572                         case 0: /* D4 vblank */
6573                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6574                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6575
6576                                 if (rdev->irq.crtc_vblank_int[3]) {
6577                                         drm_handle_vblank(rdev->ddev, 3);
6578                                         rdev->pm.vblank_sync = true;
6579                                         wake_up(&rdev->irq.vblank_queue);
6580                                 }
6581                                 if (atomic_read(&rdev->irq.pflip[3]))
6582                                         radeon_crtc_handle_vblank(rdev, 3);
6583                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6584                                 DRM_DEBUG_VBLANK("IH: D4 vblank\n");
6585
6586                                 break;
6587                         case 1: /* D4 vline */
6588                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6589                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6590
6591                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6592                                 DRM_DEBUG_VBLANK("IH: D4 vline\n");
6593
6594                                 break;
6595                         default:
6596                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6597                                 break;
6598                         }
6599                         break;
6600                 case 5: /* D5 vblank/vline */
6601                         switch (src_data) {
6602                         case 0: /* D5 vblank */
6603                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6604                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6605
6606                                 if (rdev->irq.crtc_vblank_int[4]) {
6607                                         drm_handle_vblank(rdev->ddev, 4);
6608                                         rdev->pm.vblank_sync = true;
6609                                         wake_up(&rdev->irq.vblank_queue);
6610                                 }
6611                                 if (atomic_read(&rdev->irq.pflip[4]))
6612                                         radeon_crtc_handle_vblank(rdev, 4);
6613                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6614                                 DRM_DEBUG_VBLANK("IH: D5 vblank\n");
6615
6616                                 break;
6617                         case 1: /* D5 vline */
6618                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6619                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6620
6621                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6622                                 DRM_DEBUG_VBLANK("IH: D5 vline\n");
6623
6624                                 break;
6625                         default:
6626                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6627                                 break;
6628                         }
6629                         break;
6630                 case 6: /* D6 vblank/vline */
6631                         switch (src_data) {
6632                         case 0: /* D6 vblank */
6633                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6634                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6635
6636                                 if (rdev->irq.crtc_vblank_int[5]) {
6637                                         drm_handle_vblank(rdev->ddev, 5);
6638                                         rdev->pm.vblank_sync = true;
6639                                         wake_up(&rdev->irq.vblank_queue);
6640                                 }
6641                                 if (atomic_read(&rdev->irq.pflip[5]))
6642                                         radeon_crtc_handle_vblank(rdev, 5);
6643                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6644                                 DRM_DEBUG_VBLANK("IH: D6 vblank\n");
6645
6646                                 break;
6647                         case 1: /* D6 vline */
6648                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6649                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6650
6651                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6652                                 DRM_DEBUG_VBLANK("IH: D6 vline\n");
6653
6654                                 break;
6655                         default:
6656                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6657                                 break;
6658                         }
6659                         break;
6660                 case 8: /* D1 page flip */
6661                 case 10: /* D2 page flip */
6662                 case 12: /* D3 page flip */
6663                 case 14: /* D4 page flip */
6664                 case 16: /* D5 page flip */
6665                 case 18: /* D6 page flip */
6666                         DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6667                         if (radeon_use_pflipirq > 0)
6668                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6669                         break;
6670                 case 42: /* HPD hotplug */
6671                         switch (src_data) {
6672                         case 0:
6673                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6674                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6675
6676                                 rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6677                                 queue_hotplug = true;
6678                                 DRM_DEBUG("IH: HPD1\n");
6679
6680                                 break;
6681                         case 1:
6682                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6683                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6684
6685                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6686                                 queue_hotplug = true;
6687                                 DRM_DEBUG("IH: HPD2\n");
6688
6689                                 break;
6690                         case 2:
6691                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6692                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6693
6694                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6695                                 queue_hotplug = true;
6696                                 DRM_DEBUG("IH: HPD3\n");
6697
6698                                 break;
6699                         case 3:
6700                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6701                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6702
6703                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6704                                 queue_hotplug = true;
6705                                 DRM_DEBUG("IH: HPD4\n");
6706
6707                                 break;
6708                         case 4:
6709                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6710                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6711
6712                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6713                                 queue_hotplug = true;
6714                                 DRM_DEBUG("IH: HPD5\n");
6715
6716                                 break;
6717                         case 5:
6718                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6719                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6720
6721                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6722                                 queue_hotplug = true;
6723                                 DRM_DEBUG("IH: HPD6\n");
6724
6725                                 break;
6726                         case 6:
6727                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6728                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6729
6730                                 rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6731                                 queue_dp = true;
6732                                 DRM_DEBUG("IH: HPD_RX 1\n");
6733
6734                                 break;
6735                         case 7:
6736                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6737                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6738
6739                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6740                                 queue_dp = true;
6741                                 DRM_DEBUG("IH: HPD_RX 2\n");
6742
6743                                 break;
6744                         case 8:
6745                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6746                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6747
6748                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6749                                 queue_dp = true;
6750                                 DRM_DEBUG("IH: HPD_RX 3\n");
6751
6752                                 break;
6753                         case 9:
6754                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6755                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6756
6757                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6758                                 queue_dp = true;
6759                                 DRM_DEBUG("IH: HPD_RX 4\n");
6760
6761                                 break;
6762                         case 10:
6763                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6764                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6765
6766                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6767                                 queue_dp = true;
6768                                 DRM_DEBUG("IH: HPD_RX 5\n");
6769
6770                                 break;
6771                         case 11:
6772                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6773                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6774
6775                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6776                                 queue_dp = true;
6777                                 DRM_DEBUG("IH: HPD_RX 6\n");
6778
6779                                 break;
6780                         default:
6781                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6782                                 break;
6783                         }
6784                         break;
6785                 case 96:
6786                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6787                         WREG32(SRBM_INT_ACK, 0x1);
6788                         break;
6789                 case 124: /* UVD */
6790                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6791                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6792                         break;
6793                 case 146:
6794                 case 147:
6795                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6796                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6797                         /* reset addr and status */
6798                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6799                         if (addr == 0x0 && status == 0x0)
6800                                 break;
6801                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6802                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6803                                 addr);
6804                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6805                                 status);
6806                         si_vm_decode_fault(rdev, status, addr);
6807                         break;
6808                 case 176: /* RINGID0 CP_INT */
6809                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6810                         break;
6811                 case 177: /* RINGID1 CP_INT */
6812                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6813                         break;
6814                 case 178: /* RINGID2 CP_INT */
6815                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6816                         break;
6817                 case 181: /* CP EOP event */
6818                         DRM_DEBUG("IH: CP EOP\n");
6819                         switch (ring_id) {
6820                         case 0:
6821                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6822                                 break;
6823                         case 1:
6824                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6825                                 break;
6826                         case 2:
6827                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6828                                 break;
6829                         }
6830                         break;
6831                 case 224: /* DMA trap event */
6832                         DRM_DEBUG("IH: DMA trap\n");
6833                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6834                         break;
6835                 case 230: /* thermal low to high */
6836                         DRM_DEBUG("IH: thermal low to high\n");
6837                         rdev->pm.dpm.thermal.high_to_low = false;
6838                         queue_thermal = true;
6839                         break;
6840                 case 231: /* thermal high to low */
6841                         DRM_DEBUG("IH: thermal high to low\n");
6842                         rdev->pm.dpm.thermal.high_to_low = true;
6843                         queue_thermal = true;
6844                         break;
6845                 case 233: /* GUI IDLE */
6846                         DRM_DEBUG("IH: GUI idle\n");
6847                         break;
6848                 case 244: /* DMA trap event */
6849                         DRM_DEBUG("IH: DMA1 trap\n");
6850                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6851                         break;
6852                 default:
6853                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6854                         break;
6855                 }
6856
6857                 /* wptr/rptr are in bytes! */
6858                 rptr += 16;
6859                 rptr &= rdev->ih.ptr_mask;
6860                 WREG32(IH_RB_RPTR, rptr);
6861         }
6862         if (queue_dp)
6863                 schedule_work(&rdev->dp_work);
6864         if (queue_hotplug)
6865                 schedule_delayed_work(&rdev->hotplug_work, 0);
6866         if (queue_thermal && rdev->pm.dpm_enabled)
6867                 schedule_work(&rdev->pm.dpm.thermal.work);
6868         rdev->ih.rptr = rptr;
6869         atomic_set(&rdev->ih.lock, 0);
6870
6871         /* make sure wptr hasn't changed while processing */
6872         wptr = si_get_ih_wptr(rdev);
6873         if (wptr != rptr)
6874                 goto restart_ih;
6875
6876         return IRQ_HANDLED;
6877 }
6878
6879 /*
6880  * startup/shutdown callbacks
6881  */
6882 static void si_uvd_init(struct radeon_device *rdev)
6883 {
6884         int r;
6885
6886         if (!rdev->has_uvd)
6887                 return;
6888
6889         r = radeon_uvd_init(rdev);
6890         if (r) {
6891                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6892                 /*
6893                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6894                  * to early fails uvd_v2_2_resume() and thus nothing happens
6895                  * there. So it is pointless to try to go through that code
6896                  * hence why we disable uvd here.
6897                  */
6898                 rdev->has_uvd = 0;
6899                 return;
6900         }
6901         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6902         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6903 }
6904
6905 static void si_uvd_start(struct radeon_device *rdev)
6906 {
6907         int r;
6908
6909         if (!rdev->has_uvd)
6910                 return;
6911
6912         r = uvd_v2_2_resume(rdev);
6913         if (r) {
6914                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6915                 goto error;
6916         }
6917         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6918         if (r) {
6919                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6920                 goto error;
6921         }
6922         return;
6923
6924 error:
6925         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6926 }
6927
6928 static void si_uvd_resume(struct radeon_device *rdev)
6929 {
6930         struct radeon_ring *ring;
6931         int r;
6932
6933         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6934                 return;
6935
6936         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6937         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6938         if (r) {
6939                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6940                 return;
6941         }
6942         r = uvd_v1_0_init(rdev);
6943         if (r) {
6944                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6945                 return;
6946         }
6947 }
6948
6949 static void si_vce_init(struct radeon_device *rdev)
6950 {
6951         int r;
6952
6953         if (!rdev->has_vce)
6954                 return;
6955
6956         r = radeon_vce_init(rdev);
6957         if (r) {
6958                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6959                 /*
6960                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
6961                  * to early fails si_vce_start() and thus nothing happens
6962                  * there. So it is pointless to try to go through that code
6963                  * hence why we disable vce here.
6964                  */
6965                 rdev->has_vce = 0;
6966                 return;
6967         }
6968         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6969         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6970         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6971         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6972 }
6973
6974 static void si_vce_start(struct radeon_device *rdev)
6975 {
6976         int r;
6977
6978         if (!rdev->has_vce)
6979                 return;
6980
6981         r = radeon_vce_resume(rdev);
6982         if (r) {
6983                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6984                 goto error;
6985         }
6986         r = vce_v1_0_resume(rdev);
6987         if (r) {
6988                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6989                 goto error;
6990         }
6991         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6992         if (r) {
6993                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6994                 goto error;
6995         }
6996         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6997         if (r) {
6998                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6999                 goto error;
7000         }
7001         return;
7002
7003 error:
7004         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
7005         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
7006 }
7007
7008 static void si_vce_resume(struct radeon_device *rdev)
7009 {
7010         struct radeon_ring *ring;
7011         int r;
7012
7013         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
7014                 return;
7015
7016         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7017         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7018         if (r) {
7019                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7020                 return;
7021         }
7022         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7023         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7024         if (r) {
7025                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7026                 return;
7027         }
7028         r = vce_v1_0_init(rdev);
7029         if (r) {
7030                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7031                 return;
7032         }
7033 }
7034
7035 static int si_startup(struct radeon_device *rdev)
7036 {
7037         struct radeon_ring *ring;
7038         int r;
7039
7040         /* enable pcie gen2/3 link */
7041         si_pcie_gen3_enable(rdev);
7042         /* enable aspm */
7043         si_program_aspm(rdev);
7044
7045         /* scratch needs to be initialized before MC */
7046         r = r600_vram_scratch_init(rdev);
7047         if (r)
7048                 return r;
7049
7050         si_mc_program(rdev);
7051
7052         if (!rdev->pm.dpm_enabled) {
7053                 r = si_mc_load_microcode(rdev);
7054                 if (r) {
7055                         DRM_ERROR("Failed to load MC firmware!\n");
7056                         return r;
7057                 }
7058         }
7059
7060         r = si_pcie_gart_enable(rdev);
7061         if (r)
7062                 return r;
7063         si_gpu_init(rdev);
7064
7065         /* allocate rlc buffers */
7066         if (rdev->family == CHIP_VERDE) {
7067                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7068                 rdev->rlc.reg_list_size =
7069                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7070         }
7071         rdev->rlc.cs_data = si_cs_data;
7072         r = sumo_rlc_init(rdev);
7073         if (r) {
7074                 DRM_ERROR("Failed to init rlc BOs!\n");
7075                 return r;
7076         }
7077
7078         /* allocate wb buffer */
7079         r = radeon_wb_init(rdev);
7080         if (r)
7081                 return r;
7082
7083         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7084         if (r) {
7085                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7086                 return r;
7087         }
7088
7089         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7090         if (r) {
7091                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7092                 return r;
7093         }
7094
7095         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7096         if (r) {
7097                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7098                 return r;
7099         }
7100
7101         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7102         if (r) {
7103                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7104                 return r;
7105         }
7106
7107         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7108         if (r) {
7109                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7110                 return r;
7111         }
7112
7113         si_uvd_start(rdev);
7114         si_vce_start(rdev);
7115
7116         /* Enable IRQ */
7117         if (!rdev->irq.installed) {
7118                 r = radeon_irq_kms_init(rdev);
7119                 if (r)
7120                         return r;
7121         }
7122
7123         r = si_irq_init(rdev);
7124         if (r) {
7125                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7126                 radeon_irq_kms_fini(rdev);
7127                 return r;
7128         }
7129         si_irq_set(rdev);
7130
7131         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7132         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7133                              RADEON_CP_PACKET2);
7134         if (r)
7135                 return r;
7136
7137         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7138         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7139                              RADEON_CP_PACKET2);
7140         if (r)
7141                 return r;
7142
7143         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7144         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7145                              RADEON_CP_PACKET2);
7146         if (r)
7147                 return r;
7148
7149         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7150         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7151                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7152         if (r)
7153                 return r;
7154
7155         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7156         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7157                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7158         if (r)
7159                 return r;
7160
7161         r = si_cp_load_microcode(rdev);
7162         if (r)
7163                 return r;
7164         r = si_cp_resume(rdev);
7165         if (r)
7166                 return r;
7167
7168         r = cayman_dma_resume(rdev);
7169         if (r)
7170                 return r;
7171
7172         si_uvd_resume(rdev);
7173         si_vce_resume(rdev);
7174
7175         r = radeon_ib_pool_init(rdev);
7176         if (r) {
7177                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7178                 return r;
7179         }
7180
7181         r = radeon_vm_manager_init(rdev);
7182         if (r) {
7183                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7184                 return r;
7185         }
7186
7187         r = radeon_audio_init(rdev);
7188         if (r)
7189                 return r;
7190
7191         return 0;
7192 }
7193
7194 int si_resume(struct radeon_device *rdev)
7195 {
7196         int r;
7197
7198         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7199          * posting will perform necessary task to bring back GPU into good
7200          * shape.
7201          */
7202         /* post card */
7203         atom_asic_init(rdev->mode_info.atom_context);
7204
7205         /* init golden registers */
7206         si_init_golden_registers(rdev);
7207
7208         if (rdev->pm.pm_method == PM_METHOD_DPM)
7209                 radeon_pm_resume(rdev);
7210
7211         rdev->accel_working = true;
7212         r = si_startup(rdev);
7213         if (r) {
7214                 DRM_ERROR("si startup failed on resume\n");
7215                 rdev->accel_working = false;
7216                 return r;
7217         }
7218
7219         return r;
7220
7221 }
7222
7223 int si_suspend(struct radeon_device *rdev)
7224 {
7225         radeon_pm_suspend(rdev);
7226         radeon_audio_fini(rdev);
7227         radeon_vm_manager_fini(rdev);
7228         si_cp_enable(rdev, false);
7229         cayman_dma_stop(rdev);
7230         if (rdev->has_uvd) {
7231                 uvd_v1_0_fini(rdev);
7232                 radeon_uvd_suspend(rdev);
7233         }
7234         if (rdev->has_vce)
7235                 radeon_vce_suspend(rdev);
7236         si_fini_pg(rdev);
7237         si_fini_cg(rdev);
7238         si_irq_suspend(rdev);
7239         radeon_wb_disable(rdev);
7240         si_pcie_gart_disable(rdev);
7241         return 0;
7242 }
7243
7244 /* Plan is to move initialization in that function and use
7245  * helper function so that radeon_device_init pretty much
7246  * do nothing more than calling asic specific function. This
7247  * should also allow to remove a bunch of callback function
7248  * like vram_info.
7249  */
7250 int si_init(struct radeon_device *rdev)
7251 {
7252         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7253         int r;
7254
7255         /* Read BIOS */
7256         if (!radeon_get_bios(rdev)) {
7257                 if (ASIC_IS_AVIVO(rdev))
7258                         return -EINVAL;
7259         }
7260         /* Must be an ATOMBIOS */
7261         if (!rdev->is_atom_bios) {
7262                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7263                 return -EINVAL;
7264         }
7265         r = radeon_atombios_init(rdev);
7266         if (r)
7267                 return r;
7268
7269         /* Post card if necessary */
7270         if (!radeon_card_posted(rdev)) {
7271                 if (!rdev->bios) {
7272                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7273                         return -EINVAL;
7274                 }
7275                 DRM_INFO("GPU not posted. posting now...\n");
7276                 atom_asic_init(rdev->mode_info.atom_context);
7277         }
7278         /* init golden registers */
7279         si_init_golden_registers(rdev);
7280         /* Initialize scratch registers */
7281         si_scratch_init(rdev);
7282         /* Initialize surface registers */
7283         radeon_surface_init(rdev);
7284         /* Initialize clocks */
7285         radeon_get_clock_info(rdev->ddev);
7286
7287         /* Fence driver */
7288         r = radeon_fence_driver_init(rdev);
7289         if (r)
7290                 return r;
7291
7292         /* initialize memory controller */
7293         r = si_mc_init(rdev);
7294         if (r)
7295                 return r;
7296         /* Memory manager */
7297         r = radeon_bo_init(rdev);
7298         if (r)
7299                 return r;
7300
7301         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7302             !rdev->rlc_fw || !rdev->mc_fw) {
7303                 r = si_init_microcode(rdev);
7304                 if (r) {
7305                         DRM_ERROR("Failed to load firmware!\n");
7306                         return r;
7307                 }
7308         }
7309
7310         /* Initialize power management */
7311         radeon_pm_init(rdev);
7312
7313         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7314         ring->ring_obj = NULL;
7315         r600_ring_init(rdev, ring, 1024 * 1024);
7316
7317         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7318         ring->ring_obj = NULL;
7319         r600_ring_init(rdev, ring, 1024 * 1024);
7320
7321         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7322         ring->ring_obj = NULL;
7323         r600_ring_init(rdev, ring, 1024 * 1024);
7324
7325         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7326         ring->ring_obj = NULL;
7327         r600_ring_init(rdev, ring, 64 * 1024);
7328
7329         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7330         ring->ring_obj = NULL;
7331         r600_ring_init(rdev, ring, 64 * 1024);
7332
7333         si_uvd_init(rdev);
7334         si_vce_init(rdev);
7335
7336         rdev->ih.ring_obj = NULL;
7337         r600_ih_ring_init(rdev, 64 * 1024);
7338
7339         r = r600_pcie_gart_init(rdev);
7340         if (r)
7341                 return r;
7342
7343 #ifdef __DragonFly__
7344         /*
7345            Some glx operations (xfce 4.14) hang on si hardware,
7346            tell userland acceleration is not working properly
7347         */
7348         rdev->accel_working = false;
7349 #else
7350         rdev->accel_working = true;
7351 #endif
7352         r = si_startup(rdev);
7353         if (r) {
7354                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7355                 si_cp_fini(rdev);
7356                 cayman_dma_fini(rdev);
7357                 si_irq_fini(rdev);
7358                 sumo_rlc_fini(rdev);
7359                 radeon_wb_fini(rdev);
7360                 radeon_ib_pool_fini(rdev);
7361                 radeon_vm_manager_fini(rdev);
7362                 radeon_irq_kms_fini(rdev);
7363                 si_pcie_gart_fini(rdev);
7364                 rdev->accel_working = false;
7365         }
7366
7367         /* Don't start up if the MC ucode is missing.
7368          * The default clocks and voltages before the MC ucode
7369          * is loaded are not suffient for advanced operations.
7370          */
7371         if (!rdev->mc_fw) {
7372                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7373                 return -EINVAL;
7374         }
7375
7376         return 0;
7377 }
7378
7379 void si_fini(struct radeon_device *rdev)
7380 {
7381         radeon_pm_fini(rdev);
7382         si_cp_fini(rdev);
7383         cayman_dma_fini(rdev);
7384         si_fini_pg(rdev);
7385         si_fini_cg(rdev);
7386         si_irq_fini(rdev);
7387         sumo_rlc_fini(rdev);
7388         radeon_wb_fini(rdev);
7389         radeon_vm_manager_fini(rdev);
7390         radeon_ib_pool_fini(rdev);
7391         radeon_irq_kms_fini(rdev);
7392         if (rdev->has_uvd) {
7393                 uvd_v1_0_fini(rdev);
7394                 radeon_uvd_fini(rdev);
7395         }
7396         if (rdev->has_vce)
7397                 radeon_vce_fini(rdev);
7398         si_pcie_gart_fini(rdev);
7399         r600_vram_scratch_fini(rdev);
7400         radeon_gem_fini(rdev);
7401         radeon_fence_driver_fini(rdev);
7402         radeon_bo_fini(rdev);
7403         radeon_atombios_fini(rdev);
7404         si_fini_microcode(rdev);
7405         kfree(rdev->bios);
7406         rdev->bios = NULL;
7407 }
7408
7409 /**
7410  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7411  *
7412  * @rdev: radeon_device pointer
7413  *
7414  * Fetches a GPU clock counter snapshot (SI).
7415  * Returns the 64 bit clock counter snapshot.
7416  */
7417 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7418 {
7419         uint64_t clock;
7420
7421         mutex_lock(&rdev->gpu_clock_mutex);
7422         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7423         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7424                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7425         mutex_unlock(&rdev->gpu_clock_mutex);
7426         return clock;
7427 }
7428
7429 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7430 {
7431         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7432         int r;
7433
7434         /* bypass vclk and dclk with bclk */
7435         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7436                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7437                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7438
7439         /* put PLL in bypass mode */
7440         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7441
7442         if (!vclk || !dclk) {
7443                 /* keep the Bypass mode */
7444                 return 0;
7445         }
7446
7447         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7448                                           16384, 0x03FFFFFF, 0, 128, 5,
7449                                           &fb_div, &vclk_div, &dclk_div);
7450         if (r)
7451                 return r;
7452
7453         /* set RESET_ANTI_MUX to 0 */
7454         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7455
7456         /* set VCO_MODE to 1 */
7457         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7458
7459         /* disable sleep mode */
7460         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7461
7462         /* deassert UPLL_RESET */
7463         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7464
7465         mdelay(1);
7466
7467         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7468         if (r)
7469                 return r;
7470
7471         /* assert UPLL_RESET again */
7472         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7473
7474         /* disable spread spectrum. */
7475         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7476
7477         /* set feedback divider */
7478         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7479
7480         /* set ref divider to 0 */
7481         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7482
7483         if (fb_div < 307200)
7484                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7485         else
7486                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7487
7488         /* set PDIV_A and PDIV_B */
7489         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7490                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7491                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7492
7493         /* give the PLL some time to settle */
7494         mdelay(15);
7495
7496         /* deassert PLL_RESET */
7497         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7498
7499         mdelay(15);
7500
7501         /* switch from bypass mode to normal mode */
7502         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7503
7504         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7505         if (r)
7506                 return r;
7507
7508         /* switch VCLK and DCLK selection */
7509         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7510                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7511                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7512
7513         mdelay(100);
7514
7515         return 0;
7516 }
7517
7518 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7519 {
7520         struct pci_dev *root = rdev->pdev->bus->self;
7521         int bridge_pos, gpu_pos;
7522         u32 speed_cntl, mask, current_data_rate;
7523         int ret, i;
7524         u16 tmp16;
7525
7526 #if 0
7527         if (pci_is_root_bus(rdev->pdev->bus))
7528                 return;
7529 #endif
7530
7531         if (radeon_pcie_gen2 == 0)
7532                 return;
7533
7534         if (rdev->flags & RADEON_IS_IGP)
7535                 return;
7536
7537         if (!(rdev->flags & RADEON_IS_PCIE))
7538                 return;
7539
7540         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7541         if (ret != 0)
7542                 return;
7543
7544         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7545                 return;
7546
7547         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7548         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7549                 LC_CURRENT_DATA_RATE_SHIFT;
7550         if (mask & DRM_PCIE_SPEED_80) {
7551                 if (current_data_rate == 2) {
7552                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7553                         return;
7554                 }
7555                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7556         } else if (mask & DRM_PCIE_SPEED_50) {
7557                 if (current_data_rate == 1) {
7558                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7559                         return;
7560                 }
7561                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7562         }
7563
7564         bridge_pos = pci_pcie_cap(root);
7565         if (!bridge_pos)
7566                 return;
7567
7568         gpu_pos = pci_pcie_cap(rdev->pdev);
7569         if (!gpu_pos)
7570                 return;
7571
7572         if (mask & DRM_PCIE_SPEED_80) {
7573                 /* re-try equalization if gen3 is not already enabled */
7574                 if (current_data_rate != 2) {
7575                         u16 bridge_cfg, gpu_cfg;
7576                         u16 bridge_cfg2, gpu_cfg2;
7577                         u32 max_lw, current_lw, tmp;
7578
7579                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7580                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7581
7582                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7583                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7584
7585                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7586                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7587
7588                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7589                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7590                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7591
7592                         if (current_lw < max_lw) {
7593                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7594                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7595                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7596                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7597                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7598                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7599                                 }
7600                         }
7601
7602                         for (i = 0; i < 10; i++) {
7603                                 /* check status */
7604                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7605                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7606                                         break;
7607
7608                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7609                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7610
7611                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7612                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7613
7614                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7615                                 tmp |= LC_SET_QUIESCE;
7616                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7617
7618                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7619                                 tmp |= LC_REDO_EQ;
7620                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7621
7622                                 mdelay(100);
7623
7624                                 /* linkctl */
7625                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7626                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7627                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7628                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7629
7630                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7631                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7632                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7633                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7634
7635                                 /* linkctl2 */
7636                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7637                                 tmp16 &= ~((1 << 4) | (7 << 9));
7638                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7639                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7640
7641                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7642                                 tmp16 &= ~((1 << 4) | (7 << 9));
7643                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7644                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7645
7646                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7647                                 tmp &= ~LC_SET_QUIESCE;
7648                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7649                         }
7650                 }
7651         }
7652
7653         /* set the link speed */
7654         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7655         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7656         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7657
7658         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7659         tmp16 &= ~0xf;
7660         if (mask & DRM_PCIE_SPEED_80)
7661                 tmp16 |= 3; /* gen3 */
7662         else if (mask & DRM_PCIE_SPEED_50)
7663                 tmp16 |= 2; /* gen2 */
7664         else
7665                 tmp16 |= 1; /* gen1 */
7666         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7667
7668         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7669         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7670         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7671
7672         for (i = 0; i < rdev->usec_timeout; i++) {
7673                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7674                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7675                         break;
7676                 udelay(1);
7677         }
7678 }
7679
7680 static void si_program_aspm(struct radeon_device *rdev)
7681 {
7682         u32 data, orig;
7683         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7684 #if 0
7685         bool disable_clkreq = false;
7686 #endif
7687
7688         if (radeon_aspm == 0)
7689                 return;
7690
7691         if (!(rdev->flags & RADEON_IS_PCIE))
7692                 return;
7693
7694         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7695         data &= ~LC_XMIT_N_FTS_MASK;
7696         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7697         if (orig != data)
7698                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7699
7700         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7701         data |= LC_GO_TO_RECOVERY;
7702         if (orig != data)
7703                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7704
7705         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7706         data |= P_IGNORE_EDB_ERR;
7707         if (orig != data)
7708                 WREG32_PCIE(PCIE_P_CNTL, data);
7709
7710         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7711         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7712         data |= LC_PMI_TO_L1_DIS;
7713         if (!disable_l0s)
7714                 data |= LC_L0S_INACTIVITY(7);
7715
7716         if (!disable_l1) {
7717                 data |= LC_L1_INACTIVITY(7);
7718                 data &= ~LC_PMI_TO_L1_DIS;
7719                 if (orig != data)
7720                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7721
7722                 if (!disable_plloff_in_l1) {
7723                         bool clk_req_support;
7724
7725                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7726                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7727                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7728                         if (orig != data)
7729                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7730
7731                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7732                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7733                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7734                         if (orig != data)
7735                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7736
7737                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7738                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7739                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7740                         if (orig != data)
7741                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7742
7743                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7744                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7745                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7746                         if (orig != data)
7747                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7748
7749                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7750                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7751                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7752                                 if (orig != data)
7753                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7754
7755                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7756                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7757                                 if (orig != data)
7758                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7759
7760                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7761                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7762                                 if (orig != data)
7763                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7764
7765                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7766                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7767                                 if (orig != data)
7768                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7769
7770                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7771                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7772                                 if (orig != data)
7773                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7774
7775                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7776                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7777                                 if (orig != data)
7778                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7779
7780                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7781                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7782                                 if (orig != data)
7783                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7784
7785                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7786                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7787                                 if (orig != data)
7788                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7789                         }
7790                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7791                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7792                         data |= LC_DYN_LANES_PWR_STATE(3);
7793                         if (orig != data)
7794                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7795
7796                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7797                         data &= ~LS2_EXIT_TIME_MASK;
7798                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7799                                 data |= LS2_EXIT_TIME(5);
7800                         if (orig != data)
7801                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7802
7803                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7804                         data &= ~LS2_EXIT_TIME_MASK;
7805                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7806                                 data |= LS2_EXIT_TIME(5);
7807                         if (orig != data)
7808                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7809
7810 #ifdef zMN_TODO
7811                         if (!disable_clkreq &&
7812                             !pci_is_root_bus(rdev->pdev->bus)) {
7813                                 struct pci_dev *root = rdev->pdev->bus->self;
7814                                 u32 lnkcap;
7815
7816                                 clk_req_support = false;
7817                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7818                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7819                                         clk_req_support = true;
7820                         } else {
7821                                 clk_req_support = false;
7822                         }
7823 #else
7824                         clk_req_support = false;
7825 #endif
7826
7827                         if (clk_req_support) {
7828                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7829                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7830                                 if (orig != data)
7831                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7832
7833                                 orig = data = RREG32(THM_CLK_CNTL);
7834                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7835                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7836                                 if (orig != data)
7837                                         WREG32(THM_CLK_CNTL, data);
7838
7839                                 orig = data = RREG32(MISC_CLK_CNTL);
7840                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7841                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7842                                 if (orig != data)
7843                                         WREG32(MISC_CLK_CNTL, data);
7844
7845                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7846                                 data &= ~BCLK_AS_XCLK;
7847                                 if (orig != data)
7848                                         WREG32(CG_CLKPIN_CNTL, data);
7849
7850                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7851                                 data &= ~FORCE_BIF_REFCLK_EN;
7852                                 if (orig != data)
7853                                         WREG32(CG_CLKPIN_CNTL_2, data);
7854
7855                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7856                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7857                                 data |= MPLL_CLKOUT_SEL(4);
7858                                 if (orig != data)
7859                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7860
7861                                 orig = data = RREG32(SPLL_CNTL_MODE);
7862                                 data &= ~SPLL_REFCLK_SEL_MASK;
7863                                 if (orig != data)
7864                                         WREG32(SPLL_CNTL_MODE, data);
7865                         }
7866                 }
7867         } else {
7868                 if (orig != data)
7869                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7870         }
7871
7872         orig = data = RREG32_PCIE(PCIE_CNTL2);
7873         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7874         if (orig != data)
7875                 WREG32_PCIE(PCIE_CNTL2, data);
7876
7877         if (!disable_l0s) {
7878                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7879                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7880                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7881                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7882                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7883                                 data &= ~LC_L0S_INACTIVITY_MASK;
7884                                 if (orig != data)
7885                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7886                         }
7887                 }
7888         }
7889 }
7890
7891 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7892 {
7893         unsigned i;
7894
7895         /* make sure VCEPLL_CTLREQ is deasserted */
7896         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7897
7898         mdelay(10);
7899
7900         /* assert UPLL_CTLREQ */
7901         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7902
7903         /* wait for CTLACK and CTLACK2 to get asserted */
7904         for (i = 0; i < 100; ++i) {
7905                 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7906                 if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7907                         break;
7908                 mdelay(10);
7909         }
7910
7911         /* deassert UPLL_CTLREQ */
7912         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7913
7914         if (i == 100) {
7915                 DRM_ERROR("Timeout setting UVD clocks!\n");
7916                 return -ETIMEDOUT;
7917         }
7918
7919         return 0;
7920 }
7921
7922 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7923 {
7924         unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7925         int r;
7926
7927         /* bypass evclk and ecclk with bclk */
7928         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7929                      EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7930                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7931
7932         /* put PLL in bypass mode */
7933         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7934                      ~VCEPLL_BYPASS_EN_MASK);
7935
7936         if (!evclk || !ecclk) {
7937                 /* keep the Bypass mode, put PLL to sleep */
7938                 WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7939                              ~VCEPLL_SLEEP_MASK);
7940                 return 0;
7941         }
7942
7943         r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7944                                           16384, 0x03FFFFFF, 0, 128, 5,
7945                                           &fb_div, &evclk_div, &ecclk_div);
7946         if (r)
7947                 return r;
7948
7949         /* set RESET_ANTI_MUX to 0 */
7950         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7951
7952         /* set VCO_MODE to 1 */
7953         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7954                      ~VCEPLL_VCO_MODE_MASK);
7955
7956         /* toggle VCEPLL_SLEEP to 1 then back to 0 */
7957         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7958                      ~VCEPLL_SLEEP_MASK);
7959         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7960
7961         /* deassert VCEPLL_RESET */
7962         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7963
7964         mdelay(1);
7965
7966         r = si_vce_send_vcepll_ctlreq(rdev);
7967         if (r)
7968                 return r;
7969
7970         /* assert VCEPLL_RESET again */
7971         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7972
7973         /* disable spread spectrum. */
7974         WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7975
7976         /* set feedback divider */
7977         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7978
7979         /* set ref divider to 0 */
7980         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7981
7982         /* set PDIV_A and PDIV_B */
7983         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7984                      VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7985                      ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7986
7987         /* give the PLL some time to settle */
7988         mdelay(15);
7989
7990         /* deassert PLL_RESET */
7991         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7992
7993         mdelay(15);
7994
7995         /* switch from bypass mode to normal mode */
7996         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7997
7998         r = si_vce_send_vcepll_ctlreq(rdev);
7999         if (r)
8000                 return r;
8001
8002         /* switch VCLK and DCLK selection */
8003         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
8004                      EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
8005                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
8006
8007         mdelay(100);
8008
8009         return 0;
8010 }