drm/radeon: Use release_firmware()/request_firmware()
[dragonfly.git] / sys / dev / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "cikd.h"
30 #include "atom.h"
31 #include "cik_blit_shaders.h"
32
33 /* GFX */
34 #define CIK_PFP_UCODE_SIZE 2144
35 #define CIK_ME_UCODE_SIZE 2144
36 #define CIK_CE_UCODE_SIZE 2144
37 /* compute */
38 #define CIK_MEC_UCODE_SIZE 4192
39 /* interrupts */
40 #define BONAIRE_RLC_UCODE_SIZE 2048
41 #define KB_RLC_UCODE_SIZE 2560
42 #define KV_RLC_UCODE_SIZE 2560
43 /* gddr controller */
44 #define CIK_MC_UCODE_SIZE 7866
45 /* sdma */
46 #define CIK_SDMA_UCODE_SIZE 1050
47 #define CIK_SDMA_UCODE_VERSION 64
48
49 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
62 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
63 MODULE_FIRMWARE("radeon/KABINI_me.bin");
64 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
65 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
66 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
67 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
68
69 static void cik_rlc_stop(struct radeon_device *rdev);
70
71 /*
72  * Indirect registers accessor
73  */
74 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
75 {
76         u32 r;
77
78         WREG32(PCIE_INDEX, reg);
79         (void)RREG32(PCIE_INDEX);
80         r = RREG32(PCIE_DATA);
81         return r;
82 }
83
84 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
85 {
86         WREG32(PCIE_INDEX, reg);
87         (void)RREG32(PCIE_INDEX);
88         WREG32(PCIE_DATA, v);
89         (void)RREG32(PCIE_DATA);
90 }
91
92 static const u32 bonaire_golden_spm_registers[] =
93 {
94         0x30800, 0xe0ffffff, 0xe0000000
95 };
96
97 static const u32 bonaire_golden_common_registers[] =
98 {
99         0xc770, 0xffffffff, 0x00000800,
100         0xc774, 0xffffffff, 0x00000800,
101         0xc798, 0xffffffff, 0x00007fbf,
102         0xc79c, 0xffffffff, 0x00007faf
103 };
104
105 static const u32 bonaire_golden_registers[] =
106 {
107         0x3354, 0x00000333, 0x00000333,
108         0x3350, 0x000c0fc0, 0x00040200,
109         0x9a10, 0x00010000, 0x00058208,
110         0x3c000, 0xffff1fff, 0x00140000,
111         0x3c200, 0xfdfc0fff, 0x00000100,
112         0x3c234, 0x40000000, 0x40000200,
113         0x9830, 0xffffffff, 0x00000000,
114         0x9834, 0xf00fffff, 0x00000400,
115         0x9838, 0x0002021c, 0x00020200,
116         0xc78, 0x00000080, 0x00000000,
117         0x5bb0, 0x000000f0, 0x00000070,
118         0x5bc0, 0xf0311fff, 0x80300000,
119         0x98f8, 0x73773777, 0x12010001,
120         0x350c, 0x00810000, 0x408af000,
121         0x7030, 0x31000111, 0x00000011,
122         0x2f48, 0x73773777, 0x12010001,
123         0x220c, 0x00007fb6, 0x0021a1b1,
124         0x2210, 0x00007fb6, 0x002021b1,
125         0x2180, 0x00007fb6, 0x00002191,
126         0x2218, 0x00007fb6, 0x002121b1,
127         0x221c, 0x00007fb6, 0x002021b1,
128         0x21dc, 0x00007fb6, 0x00002191,
129         0x21e0, 0x00007fb6, 0x00002191,
130         0x3628, 0x0000003f, 0x0000000a,
131         0x362c, 0x0000003f, 0x0000000a,
132         0x2ae4, 0x00073ffe, 0x000022a2,
133         0x240c, 0x000007ff, 0x00000000,
134         0x8a14, 0xf000003f, 0x00000007,
135         0x8bf0, 0x00002001, 0x00000001,
136         0x8b24, 0xffffffff, 0x00ffffff,
137         0x30a04, 0x0000ff0f, 0x00000000,
138         0x28a4c, 0x07ffffff, 0x06000000,
139         0x4d8, 0x00000fff, 0x00000100,
140         0x3e78, 0x00000001, 0x00000002,
141         0x9100, 0x03000000, 0x0362c688,
142         0x8c00, 0x000000ff, 0x00000001,
143         0xe40, 0x00001fff, 0x00001fff,
144         0x9060, 0x0000007f, 0x00000020,
145         0x9508, 0x00010000, 0x00010000,
146         0xac14, 0x000003ff, 0x000000f3,
147         0xac0c, 0xffffffff, 0x00001032
148 };
149
150 static const u32 bonaire_mgcg_cgcg_init[] =
151 {
152         0xc420, 0xffffffff, 0xfffffffc,
153         0x30800, 0xffffffff, 0xe0000000,
154         0x3c2a0, 0xffffffff, 0x00000100,
155         0x3c208, 0xffffffff, 0x00000100,
156         0x3c2c0, 0xffffffff, 0xc0000100,
157         0x3c2c8, 0xffffffff, 0xc0000100,
158         0x3c2c4, 0xffffffff, 0xc0000100,
159         0x55e4, 0xffffffff, 0x00600100,
160         0x3c280, 0xffffffff, 0x00000100,
161         0x3c214, 0xffffffff, 0x06000100,
162         0x3c220, 0xffffffff, 0x00000100,
163         0x3c218, 0xffffffff, 0x06000100,
164         0x3c204, 0xffffffff, 0x00000100,
165         0x3c2e0, 0xffffffff, 0x00000100,
166         0x3c224, 0xffffffff, 0x00000100,
167         0x3c200, 0xffffffff, 0x00000100,
168         0x3c230, 0xffffffff, 0x00000100,
169         0x3c234, 0xffffffff, 0x00000100,
170         0x3c250, 0xffffffff, 0x00000100,
171         0x3c254, 0xffffffff, 0x00000100,
172         0x3c258, 0xffffffff, 0x00000100,
173         0x3c25c, 0xffffffff, 0x00000100,
174         0x3c260, 0xffffffff, 0x00000100,
175         0x3c27c, 0xffffffff, 0x00000100,
176         0x3c278, 0xffffffff, 0x00000100,
177         0x3c210, 0xffffffff, 0x06000100,
178         0x3c290, 0xffffffff, 0x00000100,
179         0x3c274, 0xffffffff, 0x00000100,
180         0x3c2b4, 0xffffffff, 0x00000100,
181         0x3c2b0, 0xffffffff, 0x00000100,
182         0x3c270, 0xffffffff, 0x00000100,
183         0x30800, 0xffffffff, 0xe0000000,
184         0x3c020, 0xffffffff, 0x00010000,
185         0x3c024, 0xffffffff, 0x00030002,
186         0x3c028, 0xffffffff, 0x00040007,
187         0x3c02c, 0xffffffff, 0x00060005,
188         0x3c030, 0xffffffff, 0x00090008,
189         0x3c034, 0xffffffff, 0x00010000,
190         0x3c038, 0xffffffff, 0x00030002,
191         0x3c03c, 0xffffffff, 0x00040007,
192         0x3c040, 0xffffffff, 0x00060005,
193         0x3c044, 0xffffffff, 0x00090008,
194         0x3c048, 0xffffffff, 0x00010000,
195         0x3c04c, 0xffffffff, 0x00030002,
196         0x3c050, 0xffffffff, 0x00040007,
197         0x3c054, 0xffffffff, 0x00060005,
198         0x3c058, 0xffffffff, 0x00090008,
199         0x3c05c, 0xffffffff, 0x00010000,
200         0x3c060, 0xffffffff, 0x00030002,
201         0x3c064, 0xffffffff, 0x00040007,
202         0x3c068, 0xffffffff, 0x00060005,
203         0x3c06c, 0xffffffff, 0x00090008,
204         0x3c070, 0xffffffff, 0x00010000,
205         0x3c074, 0xffffffff, 0x00030002,
206         0x3c078, 0xffffffff, 0x00040007,
207         0x3c07c, 0xffffffff, 0x00060005,
208         0x3c080, 0xffffffff, 0x00090008,
209         0x3c084, 0xffffffff, 0x00010000,
210         0x3c088, 0xffffffff, 0x00030002,
211         0x3c08c, 0xffffffff, 0x00040007,
212         0x3c090, 0xffffffff, 0x00060005,
213         0x3c094, 0xffffffff, 0x00090008,
214         0x3c098, 0xffffffff, 0x00010000,
215         0x3c09c, 0xffffffff, 0x00030002,
216         0x3c0a0, 0xffffffff, 0x00040007,
217         0x3c0a4, 0xffffffff, 0x00060005,
218         0x3c0a8, 0xffffffff, 0x00090008,
219         0x3c000, 0xffffffff, 0x96e00200,
220         0x8708, 0xffffffff, 0x00900100,
221         0xc424, 0xffffffff, 0x0020003f,
222         0x38, 0xffffffff, 0x0140001c,
223         0x3c, 0x000f0000, 0x000f0000,
224         0x220, 0xffffffff, 0xC060000C,
225         0x224, 0xc0000fff, 0x00000100,
226         0xf90, 0xffffffff, 0x00000100,
227         0xf98, 0x00000101, 0x00000000,
228         0x20a8, 0xffffffff, 0x00000104,
229         0x55e4, 0xff000fff, 0x00000100,
230         0x30cc, 0xc0000fff, 0x00000104,
231         0xc1e4, 0x00000001, 0x00000001,
232         0xd00c, 0xff000ff0, 0x00000100,
233         0xd80c, 0xff000ff0, 0x00000100
234 };
235
236 static const u32 spectre_golden_spm_registers[] =
237 {
238         0x30800, 0xe0ffffff, 0xe0000000
239 };
240
241 static const u32 spectre_golden_common_registers[] =
242 {
243         0xc770, 0xffffffff, 0x00000800,
244         0xc774, 0xffffffff, 0x00000800,
245         0xc798, 0xffffffff, 0x00007fbf,
246         0xc79c, 0xffffffff, 0x00007faf
247 };
248
249 static const u32 spectre_golden_registers[] =
250 {
251         0x3c000, 0xffff1fff, 0x96940200,
252         0x3c00c, 0xffff0001, 0xff000000,
253         0x3c200, 0xfffc0fff, 0x00000100,
254         0x6ed8, 0x00010101, 0x00010000,
255         0x9834, 0xf00fffff, 0x00000400,
256         0x9838, 0xfffffffc, 0x00020200,
257         0x5bb0, 0x000000f0, 0x00000070,
258         0x5bc0, 0xf0311fff, 0x80300000,
259         0x98f8, 0x73773777, 0x12010001,
260         0x9b7c, 0x00ff0000, 0x00fc0000,
261         0x2f48, 0x73773777, 0x12010001,
262         0x8a14, 0xf000003f, 0x00000007,
263         0x8b24, 0xffffffff, 0x00ffffff,
264         0x28350, 0x3f3f3fff, 0x00000082,
265         0x28355, 0x0000003f, 0x00000000,
266         0x3e78, 0x00000001, 0x00000002,
267         0x913c, 0xffff03df, 0x00000004,
268         0xc768, 0x00000008, 0x00000008,
269         0x8c00, 0x000008ff, 0x00000800,
270         0x9508, 0x00010000, 0x00010000,
271         0xac0c, 0xffffffff, 0x54763210,
272         0x214f8, 0x01ff01ff, 0x00000002,
273         0x21498, 0x007ff800, 0x00200000,
274         0x2015c, 0xffffffff, 0x00000f40,
275         0x30934, 0xffffffff, 0x00000001
276 };
277
278 static const u32 spectre_mgcg_cgcg_init[] =
279 {
280         0xc420, 0xffffffff, 0xfffffffc,
281         0x30800, 0xffffffff, 0xe0000000,
282         0x3c2a0, 0xffffffff, 0x00000100,
283         0x3c208, 0xffffffff, 0x00000100,
284         0x3c2c0, 0xffffffff, 0x00000100,
285         0x3c2c8, 0xffffffff, 0x00000100,
286         0x3c2c4, 0xffffffff, 0x00000100,
287         0x55e4, 0xffffffff, 0x00600100,
288         0x3c280, 0xffffffff, 0x00000100,
289         0x3c214, 0xffffffff, 0x06000100,
290         0x3c220, 0xffffffff, 0x00000100,
291         0x3c218, 0xffffffff, 0x06000100,
292         0x3c204, 0xffffffff, 0x00000100,
293         0x3c2e0, 0xffffffff, 0x00000100,
294         0x3c224, 0xffffffff, 0x00000100,
295         0x3c200, 0xffffffff, 0x00000100,
296         0x3c230, 0xffffffff, 0x00000100,
297         0x3c234, 0xffffffff, 0x00000100,
298         0x3c250, 0xffffffff, 0x00000100,
299         0x3c254, 0xffffffff, 0x00000100,
300         0x3c258, 0xffffffff, 0x00000100,
301         0x3c25c, 0xffffffff, 0x00000100,
302         0x3c260, 0xffffffff, 0x00000100,
303         0x3c27c, 0xffffffff, 0x00000100,
304         0x3c278, 0xffffffff, 0x00000100,
305         0x3c210, 0xffffffff, 0x06000100,
306         0x3c290, 0xffffffff, 0x00000100,
307         0x3c274, 0xffffffff, 0x00000100,
308         0x3c2b4, 0xffffffff, 0x00000100,
309         0x3c2b0, 0xffffffff, 0x00000100,
310         0x3c270, 0xffffffff, 0x00000100,
311         0x30800, 0xffffffff, 0xe0000000,
312         0x3c020, 0xffffffff, 0x00010000,
313         0x3c024, 0xffffffff, 0x00030002,
314         0x3c028, 0xffffffff, 0x00040007,
315         0x3c02c, 0xffffffff, 0x00060005,
316         0x3c030, 0xffffffff, 0x00090008,
317         0x3c034, 0xffffffff, 0x00010000,
318         0x3c038, 0xffffffff, 0x00030002,
319         0x3c03c, 0xffffffff, 0x00040007,
320         0x3c040, 0xffffffff, 0x00060005,
321         0x3c044, 0xffffffff, 0x00090008,
322         0x3c048, 0xffffffff, 0x00010000,
323         0x3c04c, 0xffffffff, 0x00030002,
324         0x3c050, 0xffffffff, 0x00040007,
325         0x3c054, 0xffffffff, 0x00060005,
326         0x3c058, 0xffffffff, 0x00090008,
327         0x3c05c, 0xffffffff, 0x00010000,
328         0x3c060, 0xffffffff, 0x00030002,
329         0x3c064, 0xffffffff, 0x00040007,
330         0x3c068, 0xffffffff, 0x00060005,
331         0x3c06c, 0xffffffff, 0x00090008,
332         0x3c070, 0xffffffff, 0x00010000,
333         0x3c074, 0xffffffff, 0x00030002,
334         0x3c078, 0xffffffff, 0x00040007,
335         0x3c07c, 0xffffffff, 0x00060005,
336         0x3c080, 0xffffffff, 0x00090008,
337         0x3c084, 0xffffffff, 0x00010000,
338         0x3c088, 0xffffffff, 0x00030002,
339         0x3c08c, 0xffffffff, 0x00040007,
340         0x3c090, 0xffffffff, 0x00060005,
341         0x3c094, 0xffffffff, 0x00090008,
342         0x3c098, 0xffffffff, 0x00010000,
343         0x3c09c, 0xffffffff, 0x00030002,
344         0x3c0a0, 0xffffffff, 0x00040007,
345         0x3c0a4, 0xffffffff, 0x00060005,
346         0x3c0a8, 0xffffffff, 0x00090008,
347         0x3c0ac, 0xffffffff, 0x00010000,
348         0x3c0b0, 0xffffffff, 0x00030002,
349         0x3c0b4, 0xffffffff, 0x00040007,
350         0x3c0b8, 0xffffffff, 0x00060005,
351         0x3c0bc, 0xffffffff, 0x00090008,
352         0x3c000, 0xffffffff, 0x96e00200,
353         0x8708, 0xffffffff, 0x00900100,
354         0xc424, 0xffffffff, 0x0020003f,
355         0x38, 0xffffffff, 0x0140001c,
356         0x3c, 0x000f0000, 0x000f0000,
357         0x220, 0xffffffff, 0xC060000C,
358         0x224, 0xc0000fff, 0x00000100,
359         0xf90, 0xffffffff, 0x00000100,
360         0xf98, 0x00000101, 0x00000000,
361         0x20a8, 0xffffffff, 0x00000104,
362         0x55e4, 0xff000fff, 0x00000100,
363         0x30cc, 0xc0000fff, 0x00000104,
364         0xc1e4, 0x00000001, 0x00000001,
365         0xd00c, 0xff000ff0, 0x00000100,
366         0xd80c, 0xff000ff0, 0x00000100
367 };
368
369 static const u32 kalindi_golden_spm_registers[] =
370 {
371         0x30800, 0xe0ffffff, 0xe0000000
372 };
373
374 static const u32 kalindi_golden_common_registers[] =
375 {
376         0xc770, 0xffffffff, 0x00000800,
377         0xc774, 0xffffffff, 0x00000800,
378         0xc798, 0xffffffff, 0x00007fbf,
379         0xc79c, 0xffffffff, 0x00007faf
380 };
381
382 static const u32 kalindi_golden_registers[] =
383 {
384         0x3c000, 0xffffdfff, 0x6e944040,
385         0x55e4, 0xff607fff, 0xfc000100,
386         0x3c220, 0xff000fff, 0x00000100,
387         0x3c224, 0xff000fff, 0x00000100,
388         0x3c200, 0xfffc0fff, 0x00000100,
389         0x6ed8, 0x00010101, 0x00010000,
390         0x9830, 0xffffffff, 0x00000000,
391         0x9834, 0xf00fffff, 0x00000400,
392         0x5bb0, 0x000000f0, 0x00000070,
393         0x5bc0, 0xf0311fff, 0x80300000,
394         0x98f8, 0x73773777, 0x12010001,
395         0x98fc, 0xffffffff, 0x00000010,
396         0x9b7c, 0x00ff0000, 0x00fc0000,
397         0x8030, 0x00001f0f, 0x0000100a,
398         0x2f48, 0x73773777, 0x12010001,
399         0x2408, 0x000fffff, 0x000c007f,
400         0x8a14, 0xf000003f, 0x00000007,
401         0x8b24, 0x3fff3fff, 0x00ffcfff,
402         0x30a04, 0x0000ff0f, 0x00000000,
403         0x28a4c, 0x07ffffff, 0x06000000,
404         0x4d8, 0x00000fff, 0x00000100,
405         0x3e78, 0x00000001, 0x00000002,
406         0xc768, 0x00000008, 0x00000008,
407         0x8c00, 0x000000ff, 0x00000003,
408         0x214f8, 0x01ff01ff, 0x00000002,
409         0x21498, 0x007ff800, 0x00200000,
410         0x2015c, 0xffffffff, 0x00000f40,
411         0x88c4, 0x001f3ae3, 0x00000082,
412         0x88d4, 0x0000001f, 0x00000010,
413         0x30934, 0xffffffff, 0x00000000
414 };
415
416 static const u32 kalindi_mgcg_cgcg_init[] =
417 {
418         0xc420, 0xffffffff, 0xfffffffc,
419         0x30800, 0xffffffff, 0xe0000000,
420         0x3c2a0, 0xffffffff, 0x00000100,
421         0x3c208, 0xffffffff, 0x00000100,
422         0x3c2c0, 0xffffffff, 0x00000100,
423         0x3c2c8, 0xffffffff, 0x00000100,
424         0x3c2c4, 0xffffffff, 0x00000100,
425         0x55e4, 0xffffffff, 0x00600100,
426         0x3c280, 0xffffffff, 0x00000100,
427         0x3c214, 0xffffffff, 0x06000100,
428         0x3c220, 0xffffffff, 0x00000100,
429         0x3c218, 0xffffffff, 0x06000100,
430         0x3c204, 0xffffffff, 0x00000100,
431         0x3c2e0, 0xffffffff, 0x00000100,
432         0x3c224, 0xffffffff, 0x00000100,
433         0x3c200, 0xffffffff, 0x00000100,
434         0x3c230, 0xffffffff, 0x00000100,
435         0x3c234, 0xffffffff, 0x00000100,
436         0x3c250, 0xffffffff, 0x00000100,
437         0x3c254, 0xffffffff, 0x00000100,
438         0x3c258, 0xffffffff, 0x00000100,
439         0x3c25c, 0xffffffff, 0x00000100,
440         0x3c260, 0xffffffff, 0x00000100,
441         0x3c27c, 0xffffffff, 0x00000100,
442         0x3c278, 0xffffffff, 0x00000100,
443         0x3c210, 0xffffffff, 0x06000100,
444         0x3c290, 0xffffffff, 0x00000100,
445         0x3c274, 0xffffffff, 0x00000100,
446         0x3c2b4, 0xffffffff, 0x00000100,
447         0x3c2b0, 0xffffffff, 0x00000100,
448         0x3c270, 0xffffffff, 0x00000100,
449         0x30800, 0xffffffff, 0xe0000000,
450         0x3c020, 0xffffffff, 0x00010000,
451         0x3c024, 0xffffffff, 0x00030002,
452         0x3c028, 0xffffffff, 0x00040007,
453         0x3c02c, 0xffffffff, 0x00060005,
454         0x3c030, 0xffffffff, 0x00090008,
455         0x3c034, 0xffffffff, 0x00010000,
456         0x3c038, 0xffffffff, 0x00030002,
457         0x3c03c, 0xffffffff, 0x00040007,
458         0x3c040, 0xffffffff, 0x00060005,
459         0x3c044, 0xffffffff, 0x00090008,
460         0x3c000, 0xffffffff, 0x96e00200,
461         0x8708, 0xffffffff, 0x00900100,
462         0xc424, 0xffffffff, 0x0020003f,
463         0x38, 0xffffffff, 0x0140001c,
464         0x3c, 0x000f0000, 0x000f0000,
465         0x220, 0xffffffff, 0xC060000C,
466         0x224, 0xc0000fff, 0x00000100,
467         0x20a8, 0xffffffff, 0x00000104,
468         0x55e4, 0xff000fff, 0x00000100,
469         0x30cc, 0xc0000fff, 0x00000104,
470         0xc1e4, 0x00000001, 0x00000001,
471         0xd00c, 0xff000ff0, 0x00000100,
472         0xd80c, 0xff000ff0, 0x00000100
473 };
474
475 static void cik_init_golden_registers(struct radeon_device *rdev)
476 {
477         switch (rdev->family) {
478         case CHIP_BONAIRE:
479                 radeon_program_register_sequence(rdev,
480                                                  bonaire_mgcg_cgcg_init,
481                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
482                 radeon_program_register_sequence(rdev,
483                                                  bonaire_golden_registers,
484                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
485                 radeon_program_register_sequence(rdev,
486                                                  bonaire_golden_common_registers,
487                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
488                 radeon_program_register_sequence(rdev,
489                                                  bonaire_golden_spm_registers,
490                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
491                 break;
492         case CHIP_KABINI:
493                 radeon_program_register_sequence(rdev,
494                                                  kalindi_mgcg_cgcg_init,
495                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
496                 radeon_program_register_sequence(rdev,
497                                                  kalindi_golden_registers,
498                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
499                 radeon_program_register_sequence(rdev,
500                                                  kalindi_golden_common_registers,
501                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
502                 radeon_program_register_sequence(rdev,
503                                                  kalindi_golden_spm_registers,
504                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
505                 break;
506         case CHIP_KAVERI:
507                 radeon_program_register_sequence(rdev,
508                                                  spectre_mgcg_cgcg_init,
509                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
510                 radeon_program_register_sequence(rdev,
511                                                  spectre_golden_registers,
512                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
513                 radeon_program_register_sequence(rdev,
514                                                  spectre_golden_common_registers,
515                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
516                 radeon_program_register_sequence(rdev,
517                                                  spectre_golden_spm_registers,
518                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
519                 break;
520         default:
521                 break;
522         }
523 }
524
525 /**
526  * cik_get_xclk - get the xclk
527  *
528  * @rdev: radeon_device pointer
529  *
530  * Returns the reference clock used by the gfx engine
531  * (CIK).
532  */
533 u32 cik_get_xclk(struct radeon_device *rdev)
534 {
535         u32 reference_clock = rdev->clock.spll.reference_freq;
536
537         if (rdev->flags & RADEON_IS_IGP) {
538                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
539                         return reference_clock / 2;
540         } else {
541                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
542                         return reference_clock / 4;
543         }
544         return reference_clock;
545 }
546
547 /**
548  * cik_mm_rdoorbell - read a doorbell dword
549  *
550  * @rdev: radeon_device pointer
551  * @offset: byte offset into the aperture
552  *
553  * Returns the value in the doorbell aperture at the
554  * requested offset (CIK).
555  */
556 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
557 {
558         if (offset < rdev->doorbell.size) {
559                 return readl(((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
560         } else {
561                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
562                 return 0;
563         }
564 }
565
566 /**
567  * cik_mm_wdoorbell - write a doorbell dword
568  *
569  * @rdev: radeon_device pointer
570  * @offset: byte offset into the aperture
571  * @v: value to write
572  *
573  * Writes @v to the doorbell aperture at the
574  * requested offset (CIK).
575  */
576 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
577 {
578         if (offset < rdev->doorbell.size) {
579                 writel(v, ((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
580         } else {
581                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
582         }
583 }
584
585 #define BONAIRE_IO_MC_REGS_SIZE 36
586
587 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
588 {
589         {0x00000070, 0x04400000},
590         {0x00000071, 0x80c01803},
591         {0x00000072, 0x00004004},
592         {0x00000073, 0x00000100},
593         {0x00000074, 0x00ff0000},
594         {0x00000075, 0x34000000},
595         {0x00000076, 0x08000014},
596         {0x00000077, 0x00cc08ec},
597         {0x00000078, 0x00000400},
598         {0x00000079, 0x00000000},
599         {0x0000007a, 0x04090000},
600         {0x0000007c, 0x00000000},
601         {0x0000007e, 0x4408a8e8},
602         {0x0000007f, 0x00000304},
603         {0x00000080, 0x00000000},
604         {0x00000082, 0x00000001},
605         {0x00000083, 0x00000002},
606         {0x00000084, 0xf3e4f400},
607         {0x00000085, 0x052024e3},
608         {0x00000087, 0x00000000},
609         {0x00000088, 0x01000000},
610         {0x0000008a, 0x1c0a0000},
611         {0x0000008b, 0xff010000},
612         {0x0000008d, 0xffffefff},
613         {0x0000008e, 0xfff3efff},
614         {0x0000008f, 0xfff3efbf},
615         {0x00000092, 0xf7ffffff},
616         {0x00000093, 0xffffff7f},
617         {0x00000095, 0x00101101},
618         {0x00000096, 0x00000fff},
619         {0x00000097, 0x00116fff},
620         {0x00000098, 0x60010000},
621         {0x00000099, 0x10010000},
622         {0x0000009a, 0x00006000},
623         {0x0000009b, 0x00001000},
624         {0x0000009f, 0x00b48000}
625 };
626
627 /**
628  * cik_srbm_select - select specific register instances
629  *
630  * @rdev: radeon_device pointer
631  * @me: selected ME (micro engine)
632  * @pipe: pipe
633  * @queue: queue
634  * @vmid: VMID
635  *
636  * Switches the currently active registers instances.  Some
637  * registers are instanced per VMID, others are instanced per
638  * me/pipe/queue combination.
639  */
640 static void cik_srbm_select(struct radeon_device *rdev,
641                             u32 me, u32 pipe, u32 queue, u32 vmid)
642 {
643         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
644                              MEID(me & 0x3) |
645                              VMID(vmid & 0xf) |
646                              QUEUEID(queue & 0x7));
647         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
648 }
649
650 /* ucode loading */
651 /**
652  * ci_mc_load_microcode - load MC ucode into the hw
653  *
654  * @rdev: radeon_device pointer
655  *
656  * Load the GDDR MC ucode into the hw (CIK).
657  * Returns 0 on success, error on failure.
658  */
659 static __unused int ci_mc_load_microcode(struct radeon_device *rdev)
660 {
661         const __be32 *fw_data;
662         u32 running, blackout = 0;
663         u32 *io_mc_regs;
664         int i, ucode_size, regs_size;
665
666         if (!rdev->mc_fw)
667                 return -EINVAL;
668
669         switch (rdev->family) {
670         case CHIP_BONAIRE:
671         default:
672                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
673                 ucode_size = CIK_MC_UCODE_SIZE;
674                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
675                 break;
676         }
677
678         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
679
680         if (running == 0) {
681                 if (running) {
682                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
683                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
684                 }
685
686                 /* reset the engine and set to writable */
687                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
688                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
689
690                 /* load mc io regs */
691                 for (i = 0; i < regs_size; i++) {
692                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
693                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
694                 }
695                 /* load the MC ucode */
696                 fw_data = (const __be32 *)rdev->mc_fw->data;
697                 for (i = 0; i < ucode_size; i++)
698                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
699
700                 /* put the engine back into the active state */
701                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
702                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
703                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
704
705                 /* wait for training to complete */
706                 for (i = 0; i < rdev->usec_timeout; i++) {
707                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
708                                 break;
709                         DRM_UDELAY(1);
710                 }
711                 for (i = 0; i < rdev->usec_timeout; i++) {
712                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
713                                 break;
714                         DRM_UDELAY(1);
715                 }
716
717                 if (running)
718                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
719         }
720
721         return 0;
722 }
723
724 /**
725  * cik_init_microcode - load ucode images from disk
726  *
727  * @rdev: radeon_device pointer
728  *
729  * Use the firmware interface to load the ucode images into
730  * the driver (not loaded into hw).
731  * Returns 0 on success, error on failure.
732  */
733 static int cik_init_microcode(struct radeon_device *rdev)
734 {
735         const char *chip_name;
736         size_t pfp_req_size, me_req_size, ce_req_size,
737                 mec_req_size, rlc_req_size, mc_req_size,
738                 sdma_req_size;
739         char fw_name[30];
740         int err;
741
742         DRM_DEBUG("\n");
743
744         switch (rdev->family) {
745         case CHIP_BONAIRE:
746                 chip_name = "BONAIRE";
747                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
748                 me_req_size = CIK_ME_UCODE_SIZE * 4;
749                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
750                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
751                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
752                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
753                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
754                 break;
755         case CHIP_KAVERI:
756                 chip_name = "KAVERI";
757                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
758                 me_req_size = CIK_ME_UCODE_SIZE * 4;
759                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
760                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
761                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
762                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
763                 break;
764         case CHIP_KABINI:
765                 chip_name = "KABINI";
766                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767                 me_req_size = CIK_ME_UCODE_SIZE * 4;
768                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
769                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
771                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772                 break;
773         default: BUG();
774         }
775
776         DRM_INFO("Loading %s Microcode\n", chip_name);
777
778         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
779         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
780         if (err)
781                 goto out;
782         if (rdev->pfp_fw->datasize != pfp_req_size) {
783                 printk(KERN_ERR
784                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
785                        rdev->pfp_fw->datasize, fw_name);
786                 err = -EINVAL;
787                 goto out;
788         }
789
790         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
791         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
792         if (err)
793                 goto out;
794         if (rdev->me_fw->datasize != me_req_size) {
795                 printk(KERN_ERR
796                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
797                        rdev->me_fw->datasize, fw_name);
798                 err = -EINVAL;
799         }
800
801         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
802         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
803         if (err)
804                 goto out;
805         if (rdev->ce_fw->datasize != ce_req_size) {
806                 printk(KERN_ERR
807                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
808                        rdev->ce_fw->datasize, fw_name);
809                 err = -EINVAL;
810         }
811
812         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
813         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
814         if (err)
815                 goto out;
816         if (rdev->mec_fw->datasize != mec_req_size) {
817                 printk(KERN_ERR
818                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
819                        rdev->mec_fw->datasize, fw_name);
820                 err = -EINVAL;
821         }
822
823         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
824         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
825         if (err)
826                 goto out;
827         if (rdev->rlc_fw->datasize != rlc_req_size) {
828                 printk(KERN_ERR
829                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
830                        rdev->rlc_fw->datasize, fw_name);
831                 err = -EINVAL;
832         }
833
834         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
835         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
836         if (err)
837                 goto out;
838         if (rdev->sdma_fw->datasize != sdma_req_size) {
839                 printk(KERN_ERR
840                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
841                        rdev->sdma_fw->datasize, fw_name);
842                 err = -EINVAL;
843         }
844
845         /* No MC ucode on APUs */
846         if (!(rdev->flags & RADEON_IS_IGP)) {
847                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
848                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
849                 if (err)
850                         goto out;
851                 if (rdev->mc_fw->datasize != mc_req_size) {
852                         printk(KERN_ERR
853                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
854                                rdev->mc_fw->datasize, fw_name);
855                         err = -EINVAL;
856                 }
857         }
858
859 out:
860         if (err) {
861                 if (err != -EINVAL)
862                         printk(KERN_ERR
863                                "cik_cp: Failed to load firmware \"%s\"\n",
864                                fw_name);
865                 release_firmware(rdev->pfp_fw);
866                 rdev->pfp_fw = NULL;
867                 release_firmware(rdev->me_fw);
868                 rdev->me_fw = NULL;
869                 release_firmware(rdev->ce_fw);
870                 rdev->ce_fw = NULL;
871                 release_firmware(rdev->mec_fw);
872                 rdev->mec_fw = NULL;
873                 release_firmware(rdev->rlc_fw);
874                 rdev->rlc_fw = NULL;
875                 release_firmware(rdev->sdma_fw);
876                 rdev->sdma_fw = NULL;
877                 release_firmware(rdev->mc_fw);
878                 rdev->mc_fw = NULL;
879         }
880         return err;
881 }
882
883 /*
884  * Core functions
885  */
886 /**
887  * cik_tiling_mode_table_init - init the hw tiling table
888  *
889  * @rdev: radeon_device pointer
890  *
891  * Starting with SI, the tiling setup is done globally in a
892  * set of 32 tiling modes.  Rather than selecting each set of
893  * parameters per surface as on older asics, we just select
894  * which index in the tiling table we want to use, and the
895  * surface uses those parameters (CIK).
896  */
897 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
898 {
899         const u32 num_tile_mode_states = 32;
900         const u32 num_secondary_tile_mode_states = 16;
901         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
902         u32 num_pipe_configs;
903         u32 num_rbs = rdev->config.cik.max_backends_per_se *
904                 rdev->config.cik.max_shader_engines;
905
906         switch (rdev->config.cik.mem_row_size_in_kb) {
907         case 1:
908                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
909                 break;
910         case 2:
911         default:
912                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
913                 break;
914         case 4:
915                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
916                 break;
917         }
918
919         num_pipe_configs = rdev->config.cik.max_tile_pipes;
920         if (num_pipe_configs > 8)
921                 num_pipe_configs = 8; /* ??? */
922
923         if (num_pipe_configs == 8) {
924                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
925                         switch (reg_offset) {
926                         case 0:
927                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
928                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
929                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
930                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
931                                 break;
932                         case 1:
933                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
934                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
935                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
936                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
937                                 break;
938                         case 2:
939                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
940                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
941                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
942                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
943                                 break;
944                         case 3:
945                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
946                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
947                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
948                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
949                                 break;
950                         case 4:
951                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
952                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
953                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
954                                                  TILE_SPLIT(split_equal_to_row_size));
955                                 break;
956                         case 5:
957                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
958                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
959                                 break;
960                         case 6:
961                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
962                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
963                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
964                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
965                                 break;
966                         case 7:
967                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
968                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
970                                                  TILE_SPLIT(split_equal_to_row_size));
971                                 break;
972                         case 8:
973                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
974                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
975                                 break;
976                         case 9:
977                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
978                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
979                                 break;
980                         case 10:
981                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
982                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
983                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
984                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
985                                 break;
986                         case 11:
987                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
988                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
989                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
990                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
991                                 break;
992                         case 12:
993                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
994                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
995                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
996                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
997                                 break;
998                         case 13:
999                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1000                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1001                                 break;
1002                         case 14:
1003                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1004                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1005                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1006                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1007                                 break;
1008                         case 16:
1009                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1010                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1011                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1012                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1013                                 break;
1014                         case 17:
1015                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1016                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1017                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1018                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1019                                 break;
1020                         case 27:
1021                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1022                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1023                                 break;
1024                         case 28:
1025                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1026                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1027                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1028                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1029                                 break;
1030                         case 29:
1031                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1032                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1033                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1034                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1035                                 break;
1036                         case 30:
1037                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1038                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1039                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1040                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1041                                 break;
1042                         default:
1043                                 gb_tile_moden = 0;
1044                                 break;
1045                         }
1046                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1047                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1048                 }
1049                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1050                         switch (reg_offset) {
1051                         case 0:
1052                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1053                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1054                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1055                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1056                                 break;
1057                         case 1:
1058                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1059                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1060                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1061                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1062                                 break;
1063                         case 2:
1064                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1065                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1066                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1067                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1068                                 break;
1069                         case 3:
1070                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1071                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1072                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1073                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1074                                 break;
1075                         case 4:
1076                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1077                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1078                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1079                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1080                                 break;
1081                         case 5:
1082                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1083                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1084                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1085                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1086                                 break;
1087                         case 6:
1088                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1089                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1090                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1091                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1092                                 break;
1093                         case 8:
1094                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1095                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1096                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1097                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1098                                 break;
1099                         case 9:
1100                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1101                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1102                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1103                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1104                                 break;
1105                         case 10:
1106                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1107                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1108                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1109                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1110                                 break;
1111                         case 11:
1112                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1113                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1114                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1115                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1116                                 break;
1117                         case 12:
1118                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1119                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1120                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1121                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1122                                 break;
1123                         case 13:
1124                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1125                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1126                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1127                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1128                                 break;
1129                         case 14:
1130                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1131                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1132                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1133                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1134                                 break;
1135                         default:
1136                                 gb_tile_moden = 0;
1137                                 break;
1138                         }
1139                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1140                 }
1141         } else if (num_pipe_configs == 4) {
1142                 if (num_rbs == 4) {
1143                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1144                                 switch (reg_offset) {
1145                                 case 0:
1146                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1147                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1148                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1149                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1150                                         break;
1151                                 case 1:
1152                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1153                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1154                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1155                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1156                                         break;
1157                                 case 2:
1158                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1159                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1160                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1161                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1162                                         break;
1163                                 case 3:
1164                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1165                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1166                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1167                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1168                                         break;
1169                                 case 4:
1170                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1171                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1172                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1173                                                          TILE_SPLIT(split_equal_to_row_size));
1174                                         break;
1175                                 case 5:
1176                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1177                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1178                                         break;
1179                                 case 6:
1180                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1181                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1182                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1184                                         break;
1185                                 case 7:
1186                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1187                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1188                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189                                                          TILE_SPLIT(split_equal_to_row_size));
1190                                         break;
1191                                 case 8:
1192                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1193                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
1194                                         break;
1195                                 case 9:
1196                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1197                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1198                                         break;
1199                                 case 10:
1200                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1201                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1202                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1203                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1204                                         break;
1205                                 case 11:
1206                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1207                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1208                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1209                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1210                                         break;
1211                                 case 12:
1212                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1213                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1214                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1215                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1216                                         break;
1217                                 case 13:
1218                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1219                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1220                                         break;
1221                                 case 14:
1222                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1223                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1224                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1225                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1226                                         break;
1227                                 case 16:
1228                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1229                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1230                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1231                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1232                                         break;
1233                                 case 17:
1234                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1235                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1236                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1237                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1238                                         break;
1239                                 case 27:
1240                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1241                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1242                                         break;
1243                                 case 28:
1244                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1245                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1246                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1247                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1248                                         break;
1249                                 case 29:
1250                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1251                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1252                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1253                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1254                                         break;
1255                                 case 30:
1256                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1257                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1258                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1259                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1260                                         break;
1261                                 default:
1262                                         gb_tile_moden = 0;
1263                                         break;
1264                                 }
1265                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1266                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1267                         }
1268                 } else if (num_rbs < 4) {
1269                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1270                                 switch (reg_offset) {
1271                                 case 0:
1272                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1273                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1274                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1275                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1276                                         break;
1277                                 case 1:
1278                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1279                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1280                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1281                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1282                                         break;
1283                                 case 2:
1284                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1285                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1286                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1287                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1288                                         break;
1289                                 case 3:
1290                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1291                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1292                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1293                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1294                                         break;
1295                                 case 4:
1296                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1297                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1298                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1299                                                          TILE_SPLIT(split_equal_to_row_size));
1300                                         break;
1301                                 case 5:
1302                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1303                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1304                                         break;
1305                                 case 6:
1306                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1307                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1308                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1309                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1310                                         break;
1311                                 case 7:
1312                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1313                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1314                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1315                                                          TILE_SPLIT(split_equal_to_row_size));
1316                                         break;
1317                                 case 8:
1318                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1319                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
1320                                         break;
1321                                 case 9:
1322                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1323                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1324                                         break;
1325                                 case 10:
1326                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1327                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1328                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1329                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1330                                         break;
1331                                 case 11:
1332                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1333                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1334                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1335                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1336                                         break;
1337                                 case 12:
1338                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1339                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1340                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1341                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1342                                         break;
1343                                 case 13:
1344                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1345                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1346                                         break;
1347                                 case 14:
1348                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1349                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1350                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1351                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1352                                         break;
1353                                 case 16:
1354                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1355                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1356                                                         PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1357                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1358                                         break;
1359                                 case 17:
1360                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1361                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1362                                                         PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1363                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1364                                         break;
1365                                 case 27:
1366                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1367                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1368                                         break;
1369                                 case 28:
1370                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1371                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1372                                                         PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1373                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1374                                         break;
1375                                 case 29:
1376                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1377                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1378                                                         PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1379                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1380                                         break;
1381                                 case 30:
1382                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1383                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1384                                                         PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1385                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1386                                         break;
1387                                 default:
1388                                         gb_tile_moden = 0;
1389                                         break;
1390                                 }
1391                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1392                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1393                         }
1394                 }
1395                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1396                         switch (reg_offset) {
1397                                 case 0:
1398                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1399                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1400                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1401                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1402                                         break;
1403                                 case 1:
1404                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1405                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1406                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1407                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1408                                         break;
1409                                 case 2:
1410                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1411                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1412                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1413                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1414                                         break;
1415                                 case 3:
1416                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1417                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1418                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1419                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1420                                         break;
1421                                 case 4:
1422                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1423                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1424                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1425                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1426                                         break;
1427                                 case 5:
1428                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1429                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1430                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1431                                                         NUM_BANKS(ADDR_SURF_8_BANK));
1432                                         break;
1433                                 case 6:
1434                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1435                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1436                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1437                                                         NUM_BANKS(ADDR_SURF_4_BANK));
1438                                         break;
1439                                 case 8:
1440                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1441                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1442                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1443                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1444                                         break;
1445                                 case 9:
1446                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1447                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1448                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1449                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1450                                         break;
1451                                 case 10:
1452                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1453                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1454                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1455                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1456                                         break;
1457                                 case 11:
1458                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1459                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1460                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1461                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1462                                         break;
1463                                 case 12:
1464                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1465                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1466                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1467                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1468                                         break;
1469                                 case 13:
1470                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1471                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1472                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1473                                                         NUM_BANKS(ADDR_SURF_8_BANK));
1474                                         break;
1475                                 case 14:
1476                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1477                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1478                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1479                                                         NUM_BANKS(ADDR_SURF_4_BANK));
1480                                         break;
1481                                 default:
1482                                         gb_tile_moden = 0;
1483                                         break;
1484                         }
1485                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1486                 }
1487         } else if (num_pipe_configs == 2) {
1488                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1489                         switch (reg_offset) {
1490                                 case 0:
1491                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1492                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1493                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1494                                                         TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1495                                         break;
1496                                 case 1:
1497                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1498                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1499                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1500                                                         TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1501                                         break;
1502                                 case 2:
1503                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1504                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1505                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1506                                                         TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1507                                         break;
1508                                 case 3:
1509                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1510                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1511                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1512                                                         TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1513                                         break;
1514                                 case 4:
1515                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1516                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1517                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1518                                                         TILE_SPLIT(split_equal_to_row_size));
1519                                         break;
1520                                 case 5:
1521                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1522                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1523                                         break;
1524                                 case 6:
1525                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1526                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1527                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1528                                                         TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1529                                         break;
1530                                 case 7:
1531                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1532                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1533                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1534                                                         TILE_SPLIT(split_equal_to_row_size));
1535                                         break;
1536                                 case 8:
1537                                         gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1538                                         break;
1539                                 case 9:
1540                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1541                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1542                                         break;
1543                                 case 10:
1544                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1545                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1546                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1547                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1548                                         break;
1549                                 case 11:
1550                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1551                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1552                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1553                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1554                                         break;
1555                                 case 12:
1556                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1557                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1558                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1559                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1560                                         break;
1561                                 case 13:
1562                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1563                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1564                                         break;
1565                                 case 14:
1566                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1567                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1568                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1569                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1570                                         break;
1571                                 case 16:
1572                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1573                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1574                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1575                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1576                                         break;
1577                                 case 17:
1578                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1579                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1580                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1581                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1582                                         break;
1583                                 case 27:
1584                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1585                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1586                                         break;
1587                                 case 28:
1588                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1589                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1590                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1591                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1592                                         break;
1593                                 case 29:
1594                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1595                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1596                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1597                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1598                                         break;
1599                                 case 30:
1600                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1601                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1602                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1603                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1604                                         break;
1605                                 default:
1606                                         gb_tile_moden = 0;
1607                                         break;
1608                         }
1609                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1610                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1611                 }
1612                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1613                         switch (reg_offset) {
1614                                 case 0:
1615                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1616                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1617                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1618                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1619                                         break;
1620                                 case 1:
1621                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1622                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1623                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1624                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1625                                         break;
1626                                 case 2:
1627                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1628                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1629                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1630                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1631                                         break;
1632                                 case 3:
1633                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1634                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1635                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1636                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1637                                         break;
1638                                 case 4:
1639                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1640                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1641                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1642                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1643                                         break;
1644                                 case 5:
1645                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1646                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1647                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1648                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1649                                         break;
1650                                 case 6:
1651                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1652                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1653                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1654                                                         NUM_BANKS(ADDR_SURF_8_BANK));
1655                                         break;
1656                                 case 8:
1657                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1658                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1659                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1660                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1661                                         break;
1662                                 case 9:
1663                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1664                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1665                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1666                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1667                                         break;
1668                                 case 10:
1669                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1670                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1671                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1672                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1673                                         break;
1674                                 case 11:
1675                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1676                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1677                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1678                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1679                                         break;
1680                                 case 12:
1681                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1682                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1683                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1684                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1685                                         break;
1686                                 case 13:
1687                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1688                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1689                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1690                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1691                                         break;
1692                                 case 14:
1693                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1694                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1695                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1696                                                         NUM_BANKS(ADDR_SURF_8_BANK));
1697                                         break;
1698                                 default:
1699                                         gb_tile_moden = 0;
1700                                         break;
1701                         }
1702                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1703                 }
1704         } else
1705                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1706 }
1707
1708 /**
1709  * cik_select_se_sh - select which SE, SH to address
1710  *
1711  * @rdev: radeon_device pointer
1712  * @se_num: shader engine to address
1713  * @sh_num: sh block to address
1714  *
1715  * Select which SE, SH combinations to address. Certain
1716  * registers are instanced per SE or SH.  0xffffffff means
1717  * broadcast to all SEs or SHs (CIK).
1718  */
1719 static void cik_select_se_sh(struct radeon_device *rdev,
1720                 u32 se_num, u32 sh_num)
1721 {
1722         u32 data = INSTANCE_BROADCAST_WRITES;
1723
1724         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1725                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1726         else if (se_num == 0xffffffff)
1727                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1728         else if (sh_num == 0xffffffff)
1729                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1730         else
1731                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1732         WREG32(GRBM_GFX_INDEX, data);
1733 }
1734
1735 /**
1736  * cik_create_bitmask - create a bitmask
1737  *
1738  * @bit_width: length of the mask
1739  *
1740  * create a variable length bit mask (CIK).
1741  * Returns the bitmask.
1742  */
1743 static u32 cik_create_bitmask(u32 bit_width)
1744 {
1745         u32 i, mask = 0;
1746
1747         for (i = 0; i < bit_width; i++) {
1748                 mask <<= 1;
1749                 mask |= 1;
1750         }
1751         return mask;
1752 }
1753
1754 /**
1755  * cik_select_se_sh - select which SE, SH to address
1756  *
1757  * @rdev: radeon_device pointer
1758  * @max_rb_num: max RBs (render backends) for the asic
1759  * @se_num: number of SEs (shader engines) for the asic
1760  * @sh_per_se: number of SH blocks per SE for the asic
1761  *
1762  * Calculates the bitmask of disabled RBs (CIK).
1763  * Returns the disabled RB bitmask.
1764  */
1765 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1766                 u32 max_rb_num, u32 se_num,
1767                 u32 sh_per_se)
1768 {
1769         u32 data, mask;
1770
1771         data = RREG32(CC_RB_BACKEND_DISABLE);
1772         if (data & 1)
1773                 data &= BACKEND_DISABLE_MASK;
1774         else
1775                 data = 0;
1776         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1777
1778         data >>= BACKEND_DISABLE_SHIFT;
1779
1780         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1781
1782         return data & mask;
1783 }
1784
1785 /**
1786  * cik_setup_rb - setup the RBs on the asic
1787  *
1788  * @rdev: radeon_device pointer
1789  * @se_num: number of SEs (shader engines) for the asic
1790  * @sh_per_se: number of SH blocks per SE for the asic
1791  * @max_rb_num: max RBs (render backends) for the asic
1792  *
1793  * Configures per-SE/SH RB registers (CIK).
1794  */
1795 static void cik_setup_rb(struct radeon_device *rdev,
1796                 u32 se_num, u32 sh_per_se,
1797                 u32 max_rb_num)
1798 {
1799         int i, j;
1800         u32 data, mask;
1801         u32 disabled_rbs = 0;
1802         u32 enabled_rbs = 0;
1803
1804         for (i = 0; i < se_num; i++) {
1805                 for (j = 0; j < sh_per_se; j++) {
1806                         cik_select_se_sh(rdev, i, j);
1807                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1808                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1809                 }
1810         }
1811         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1812
1813         mask = 1;
1814         for (i = 0; i < max_rb_num; i++) {
1815                 if (!(disabled_rbs & mask))
1816                         enabled_rbs |= mask;
1817                 mask <<= 1;
1818         }
1819
1820         for (i = 0; i < se_num; i++) {
1821                 cik_select_se_sh(rdev, i, 0xffffffff);
1822                 data = 0;
1823                 for (j = 0; j < sh_per_se; j++) {
1824                         switch (enabled_rbs & 3) {
1825                                 case 1:
1826                                         data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1827                                         break;
1828                                 case 2:
1829                                         data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1830                                         break;
1831                                 case 3:
1832                                 default:
1833                                         data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1834                                         break;
1835                         }
1836                         enabled_rbs >>= 2;
1837                 }
1838                 WREG32(PA_SC_RASTER_CONFIG, data);
1839         }
1840         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1841 }
1842
1843 /**
1844  * cik_gpu_init - setup the 3D engine
1845  *
1846  * @rdev: radeon_device pointer
1847  *
1848  * Configures the 3D engine and tiling configuration
1849  * registers so that the 3D engine is usable.
1850  */
1851 static __unused void cik_gpu_init(struct radeon_device *rdev)
1852 {
1853         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1854         u32 mc_shared_chmap, mc_arb_ramcfg;
1855         u32 hdp_host_path_cntl;
1856         u32 tmp;
1857         int i, j;
1858
1859         switch (rdev->family) {
1860                 case CHIP_BONAIRE:
1861                         rdev->config.cik.max_shader_engines = 2;
1862                         rdev->config.cik.max_tile_pipes = 4;
1863                         rdev->config.cik.max_cu_per_sh = 7;
1864                         rdev->config.cik.max_sh_per_se = 1;
1865                         rdev->config.cik.max_backends_per_se = 2;
1866                         rdev->config.cik.max_texture_channel_caches = 4;
1867                         rdev->config.cik.max_gprs = 256;
1868                         rdev->config.cik.max_gs_threads = 32;
1869                         rdev->config.cik.max_hw_contexts = 8;
1870
1871                         rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1872                         rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1873                         rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1874                         rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1875                         gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1876                         break;
1877                 case CHIP_KAVERI:
1878                         /* TODO */
1879                         break;
1880                 case CHIP_KABINI:
1881                 default:
1882                         rdev->config.cik.max_shader_engines = 1;
1883                         rdev->config.cik.max_tile_pipes = 2;
1884                         rdev->config.cik.max_cu_per_sh = 2;
1885                         rdev->config.cik.max_sh_per_se = 1;
1886                         rdev->config.cik.max_backends_per_se = 1;
1887                         rdev->config.cik.max_texture_channel_caches = 2;
1888                         rdev->config.cik.max_gprs = 256;
1889                         rdev->config.cik.max_gs_threads = 16;
1890                         rdev->config.cik.max_hw_contexts = 8;
1891
1892                         rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1893                         rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1894                         rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1895                         rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1896                         gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1897                         break;
1898         }
1899
1900         /* Initialize HDP */
1901         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1902                 WREG32((0x2c14 + j), 0x00000000);
1903                 WREG32((0x2c18 + j), 0x00000000);
1904                 WREG32((0x2c1c + j), 0x00000000);
1905                 WREG32((0x2c20 + j), 0x00000000);
1906                 WREG32((0x2c24 + j), 0x00000000);
1907         }
1908
1909         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1910
1911         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1912
1913         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1914         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1915
1916         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1917         rdev->config.cik.mem_max_burst_length_bytes = 256;
1918         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1919         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1920         if (rdev->config.cik.mem_row_size_in_kb > 4)
1921                 rdev->config.cik.mem_row_size_in_kb = 4;
1922         /* XXX use MC settings? */
1923         rdev->config.cik.shader_engine_tile_size = 32;
1924         rdev->config.cik.num_gpus = 1;
1925         rdev->config.cik.multi_gpu_tile_size = 64;
1926
1927         /* fix up row size */
1928         gb_addr_config &= ~ROW_SIZE_MASK;
1929         switch (rdev->config.cik.mem_row_size_in_kb) {
1930                 case 1:
1931                 default:
1932                         gb_addr_config |= ROW_SIZE(0);
1933                         break;
1934                 case 2:
1935                         gb_addr_config |= ROW_SIZE(1);
1936                         break;
1937                 case 4:
1938                         gb_addr_config |= ROW_SIZE(2);
1939                         break;
1940         }
1941
1942         /* setup tiling info dword.  gb_addr_config is not adequate since it does
1943          * not have bank info, so create a custom tiling dword.
1944          * bits 3:0   num_pipes
1945          * bits 7:4   num_banks
1946          * bits 11:8  group_size
1947          * bits 15:12 row_size
1948          */
1949         rdev->config.cik.tile_config = 0;
1950         switch (rdev->config.cik.num_tile_pipes) {
1951                 case 1:
1952                         rdev->config.cik.tile_config |= (0 << 0);
1953                         break;
1954                 case 2:
1955                         rdev->config.cik.tile_config |= (1 << 0);
1956                         break;
1957                 case 4:
1958                         rdev->config.cik.tile_config |= (2 << 0);
1959                         break;
1960                 case 8:
1961                 default:
1962                         /* XXX what about 12? */
1963                         rdev->config.cik.tile_config |= (3 << 0);
1964                         break;
1965         }
1966         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1967                 rdev->config.cik.tile_config |= 1 << 4;
1968         else
1969                 rdev->config.cik.tile_config |= 0 << 4;
1970         rdev->config.cik.tile_config |=
1971                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1972         rdev->config.cik.tile_config |=
1973                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1974
1975         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1976         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1977         WREG32(DMIF_ADDR_CALC, gb_addr_config);
1978         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1979         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1980         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1981         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1982         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1983
1984         cik_tiling_mode_table_init(rdev);
1985
1986         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1987                         rdev->config.cik.max_sh_per_se,
1988                         rdev->config.cik.max_backends_per_se);
1989
1990         /* set HW defaults for 3D engine */
1991         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1992
1993         WREG32(SX_DEBUG_1, 0x20);
1994
1995         WREG32(TA_CNTL_AUX, 0x00010000);
1996
1997         tmp = RREG32(SPI_CONFIG_CNTL);
1998         tmp |= 0x03000000;
1999         WREG32(SPI_CONFIG_CNTL, tmp);
2000
2001         WREG32(SQ_CONFIG, 1);
2002
2003         WREG32(DB_DEBUG, 0);
2004
2005         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2006         tmp |= 0x00000400;
2007         WREG32(DB_DEBUG2, tmp);
2008
2009         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2010         tmp |= 0x00020200;
2011         WREG32(DB_DEBUG3, tmp);
2012
2013         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2014         tmp |= 0x00018208;
2015         WREG32(CB_HW_CONTROL, tmp);
2016
2017         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2018
2019         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2020                                 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2021                                 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2022                                 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2023
2024         WREG32(VGT_NUM_INSTANCES, 1);
2025
2026         WREG32(CP_PERFMON_CNTL, 0);
2027
2028         WREG32(SQ_CONFIG, 0);
2029
2030         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2031                                 FORCE_EOV_MAX_REZ_CNT(255)));
2032
2033         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2034                         AUTO_INVLD_EN(ES_AND_GS_AUTO));
2035
2036         WREG32(VGT_GS_VERTEX_REUSE, 16);
2037         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2038
2039         tmp = RREG32(HDP_MISC_CNTL);
2040         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2041         WREG32(HDP_MISC_CNTL, tmp);
2042
2043         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2044         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2045
2046         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2047         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2048
2049         DRM_UDELAY(50);
2050 }
2051
2052 /*
2053  * GPU scratch registers helpers function.
2054  */
2055 /**
2056  * cik_scratch_init - setup driver info for CP scratch regs
2057  *
2058  * @rdev: radeon_device pointer
2059  *
2060  * Set up the number and offset of the CP scratch registers.
2061  * NOTE: use of CP scratch registers is a legacy inferface and
2062  * is not used by default on newer asics (r6xx+).  On newer asics,
2063  * memory buffers are used for fences rather than scratch regs.
2064  */
2065 static __unused void cik_scratch_init(struct radeon_device *rdev)
2066 {
2067         int i;
2068
2069         rdev->scratch.num_reg = 7;
2070         rdev->scratch.reg_base = SCRATCH_REG0;
2071         for (i = 0; i < rdev->scratch.num_reg; i++) {
2072                 rdev->scratch.free[i] = true;
2073                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2074         }
2075 }
2076
2077 /**
2078  * cik_ring_test - basic gfx ring test
2079  *
2080  * @rdev: radeon_device pointer
2081  * @ring: radeon_ring structure holding ring information
2082  *
2083  * Allocate a scratch register and write to it using the gfx ring (CIK).
2084  * Provides a basic gfx ring test to verify that the ring is working.
2085  * Used by cik_cp_gfx_resume();
2086  * Returns 0 on success, error on failure.
2087  */
2088 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2089 {
2090         uint32_t scratch;
2091         uint32_t tmp = 0;
2092         unsigned i;
2093         int r;
2094
2095         r = radeon_scratch_get(rdev, &scratch);
2096         if (r) {
2097                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2098                 return r;
2099         }
2100         WREG32(scratch, 0xCAFEDEAD);
2101         r = radeon_ring_lock(rdev, ring, 3);
2102         if (r) {
2103                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2104                 radeon_scratch_free(rdev, scratch);
2105                 return r;
2106         }
2107         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2108         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2109         radeon_ring_write(ring, 0xDEADBEEF);
2110         radeon_ring_unlock_commit(rdev, ring);
2111
2112         for (i = 0; i < rdev->usec_timeout; i++) {
2113                 tmp = RREG32(scratch);
2114                 if (tmp == 0xDEADBEEF)
2115                         break;
2116                 DRM_UDELAY(1);
2117         }
2118         if (i < rdev->usec_timeout) {
2119                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2120         } else {
2121                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2122                           ring->idx, scratch, tmp);
2123                 r = -EINVAL;
2124         }
2125         radeon_scratch_free(rdev, scratch);
2126         return r;
2127 }
2128
2129 /**
2130  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2131  *
2132  * @rdev: radeon_device pointer
2133  * @fence: radeon fence object
2134  *
2135  * Emits a fence sequnce number on the gfx ring and flushes
2136  * GPU caches.
2137  */
2138 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2139                              struct radeon_fence *fence)
2140 {
2141         struct radeon_ring *ring = &rdev->ring[fence->ring];
2142         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2143
2144         /* EVENT_WRITE_EOP - flush caches, send int */
2145         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2146         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2147                                  EOP_TC_ACTION_EN |
2148                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2149                                  EVENT_INDEX(5)));
2150         radeon_ring_write(ring, addr & 0xfffffffc);
2151         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2152         radeon_ring_write(ring, fence->seq);
2153         radeon_ring_write(ring, 0);
2154         /* HDP flush */
2155         /* We should be using the new WAIT_REG_MEM special op packet here
2156          * but it causes the CP to hang
2157          */
2158         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2159         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2160                                  WRITE_DATA_DST_SEL(0)));
2161         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2162         radeon_ring_write(ring, 0);
2163         radeon_ring_write(ring, 0);
2164 }
2165
2166 /**
2167  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2168  *
2169  * @rdev: radeon_device pointer
2170  * @fence: radeon fence object
2171  *
2172  * Emits a fence sequnce number on the compute ring and flushes
2173  * GPU caches.
2174  */
2175 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2176                                  struct radeon_fence *fence)
2177 {
2178         struct radeon_ring *ring = &rdev->ring[fence->ring];
2179         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2180
2181         /* RELEASE_MEM - flush caches, send int */
2182         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2183         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2184                                  EOP_TC_ACTION_EN |
2185                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2186                                  EVENT_INDEX(5)));
2187         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2188         radeon_ring_write(ring, addr & 0xfffffffc);
2189         radeon_ring_write(ring, upper_32_bits(addr));
2190         radeon_ring_write(ring, fence->seq);
2191         radeon_ring_write(ring, 0);
2192         /* HDP flush */
2193         /* We should be using the new WAIT_REG_MEM special op packet here
2194          * but it causes the CP to hang
2195          */
2196         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2197         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2198                                  WRITE_DATA_DST_SEL(0)));
2199         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2200         radeon_ring_write(ring, 0);
2201         radeon_ring_write(ring, 0);
2202 }
2203
2204 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2205                              struct radeon_ring *ring,
2206                              struct radeon_semaphore *semaphore,
2207                              bool emit_wait)
2208 {
2209         uint64_t addr = semaphore->gpu_addr;
2210         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2211
2212         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2213         radeon_ring_write(ring, addr & 0xffffffff);
2214         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2215 }
2216
2217 /*
2218  * IB stuff
2219  */
2220 /**
2221  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2222  *
2223  * @rdev: radeon_device pointer
2224  * @ib: radeon indirect buffer object
2225  *
2226  * Emits an DE (drawing engine) or CE (constant engine) IB
2227  * on the gfx ring.  IBs are usually generated by userspace
2228  * acceleration drivers and submitted to the kernel for
2229  * sheduling on the ring.  This function schedules the IB
2230  * on the gfx ring for execution by the GPU.
2231  */
2232 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2233 {
2234         struct radeon_ring *ring = &rdev->ring[ib->ring];
2235         u32 header, control = INDIRECT_BUFFER_VALID;
2236
2237         if (ib->is_const_ib) {
2238                 /* set switch buffer packet before const IB */
2239                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2240                 radeon_ring_write(ring, 0);
2241
2242                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2243         } else {
2244                 u32 next_rptr;
2245                 if (ring->rptr_save_reg) {
2246                         next_rptr = ring->wptr + 3 + 4;
2247                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2248                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2249                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
2250                         radeon_ring_write(ring, next_rptr);
2251                 } else if (rdev->wb.enabled) {
2252                         next_rptr = ring->wptr + 5 + 4;
2253                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2254                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2255                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2256                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2257                         radeon_ring_write(ring, next_rptr);
2258                 }
2259
2260                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2261         }
2262
2263         control |= ib->length_dw |
2264                 (ib->vm ? (ib->vm->id << 24) : 0);
2265
2266         radeon_ring_write(ring, header);
2267         radeon_ring_write(ring,
2268 #ifdef __BIG_ENDIAN
2269                           (2 << 0) |
2270 #endif
2271                           (ib->gpu_addr & 0xFFFFFFFC));
2272         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2273         radeon_ring_write(ring, control);
2274 }
2275
2276 /**
2277  * cik_ib_test - basic gfx ring IB test
2278  *
2279  * @rdev: radeon_device pointer
2280  * @ring: radeon_ring structure holding ring information
2281  *
2282  * Allocate an IB and execute it on the gfx ring (CIK).
2283  * Provides a basic gfx ring test to verify that IBs are working.
2284  * Returns 0 on success, error on failure.
2285  */
2286 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2287 {
2288         struct radeon_ib ib;
2289         uint32_t scratch;
2290         uint32_t tmp = 0;
2291         unsigned i;
2292         int r;
2293
2294         r = radeon_scratch_get(rdev, &scratch);
2295         if (r) {
2296                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2297                 return r;
2298         }
2299         WREG32(scratch, 0xCAFEDEAD);
2300         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2301         if (r) {
2302                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2303                 return r;
2304         }
2305         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2306         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2307         ib.ptr[2] = 0xDEADBEEF;
2308         ib.length_dw = 3;
2309         r = radeon_ib_schedule(rdev, &ib, NULL);
2310         if (r) {
2311                 radeon_scratch_free(rdev, scratch);
2312                 radeon_ib_free(rdev, &ib);
2313                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2314                 return r;
2315         }
2316         r = radeon_fence_wait(ib.fence, false);
2317         if (r) {
2318                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2319                 return r;
2320         }
2321         for (i = 0; i < rdev->usec_timeout; i++) {
2322                 tmp = RREG32(scratch);
2323                 if (tmp == 0xDEADBEEF)
2324                         break;
2325                 DRM_UDELAY(1);
2326         }
2327         if (i < rdev->usec_timeout) {
2328                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2329         } else {
2330                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2331                           scratch, tmp);
2332                 r = -EINVAL;
2333         }
2334         radeon_scratch_free(rdev, scratch);
2335         radeon_ib_free(rdev, &ib);
2336         return r;
2337 }
2338
2339 /*
2340  * CP.
2341  * On CIK, gfx and compute now have independant command processors.
2342  *
2343  * GFX
2344  * Gfx consists of a single ring and can process both gfx jobs and
2345  * compute jobs.  The gfx CP consists of three microengines (ME):
2346  * PFP - Pre-Fetch Parser
2347  * ME - Micro Engine
2348  * CE - Constant Engine
2349  * The PFP and ME make up what is considered the Drawing Engine (DE).
2350  * The CE is an asynchronous engine used for updating buffer desciptors
2351  * used by the DE so that they can be loaded into cache in parallel
2352  * while the DE is processing state update packets.
2353  *
2354  * Compute
2355  * The compute CP consists of two microengines (ME):
2356  * MEC1 - Compute MicroEngine 1
2357  * MEC2 - Compute MicroEngine 2
2358  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2359  * The queues are exposed to userspace and are programmed directly
2360  * by the compute runtime.
2361  */
2362 /**
2363  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2364  *
2365  * @rdev: radeon_device pointer
2366  * @enable: enable or disable the MEs
2367  *
2368  * Halts or unhalts the gfx MEs.
2369  */
2370 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2371 {
2372         if (enable)
2373                 WREG32(CP_ME_CNTL, 0);
2374         else {
2375                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2376                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2377         }
2378         DRM_UDELAY(50);
2379 }
2380
2381 /**
2382  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2383  *
2384  * @rdev: radeon_device pointer
2385  *
2386  * Loads the gfx PFP, ME, and CE ucode.
2387  * Returns 0 for success, -EINVAL if the ucode is not available.
2388  */
2389 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2390 {
2391         const __be32 *fw_data;
2392         int i;
2393
2394         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2395                 return -EINVAL;
2396
2397         cik_cp_gfx_enable(rdev, false);
2398
2399         /* PFP */
2400         fw_data = (const __be32 *)rdev->pfp_fw->data;
2401         WREG32(CP_PFP_UCODE_ADDR, 0);
2402         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2403                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2404         WREG32(CP_PFP_UCODE_ADDR, 0);
2405
2406         /* CE */
2407         fw_data = (const __be32 *)rdev->ce_fw->data;
2408         WREG32(CP_CE_UCODE_ADDR, 0);
2409         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2410                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2411         WREG32(CP_CE_UCODE_ADDR, 0);
2412
2413         /* ME */
2414         fw_data = (const __be32 *)rdev->me_fw->data;
2415         WREG32(CP_ME_RAM_WADDR, 0);
2416         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2417                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2418         WREG32(CP_ME_RAM_WADDR, 0);
2419
2420         WREG32(CP_PFP_UCODE_ADDR, 0);
2421         WREG32(CP_CE_UCODE_ADDR, 0);
2422         WREG32(CP_ME_RAM_WADDR, 0);
2423         WREG32(CP_ME_RAM_RADDR, 0);
2424         return 0;
2425 }
2426
2427 /**
2428  * cik_cp_gfx_start - start the gfx ring
2429  *
2430  * @rdev: radeon_device pointer
2431  *
2432  * Enables the ring and loads the clear state context and other
2433  * packets required to init the ring.
2434  * Returns 0 for success, error for failure.
2435  */
2436 static int cik_cp_gfx_start(struct radeon_device *rdev)
2437 {
2438         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2439         int r, i;
2440
2441         /* init the CP */
2442         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2443         WREG32(CP_ENDIAN_SWAP, 0);
2444         WREG32(CP_DEVICE_ID, 1);
2445
2446         cik_cp_gfx_enable(rdev, true);
2447
2448         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2449         if (r) {
2450                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2451                 return r;
2452         }
2453
2454         /* init the CE partitions.  CE only used for gfx on CIK */
2455         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2456         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2457         radeon_ring_write(ring, 0xc000);
2458         radeon_ring_write(ring, 0xc000);
2459
2460         /* setup clear context state */
2461         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2462         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2463
2464         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2465         radeon_ring_write(ring, 0x80000000);
2466         radeon_ring_write(ring, 0x80000000);
2467
2468         for (i = 0; i < cik_default_size; i++)
2469                 radeon_ring_write(ring, cik_default_state[i]);
2470
2471         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2472         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2473
2474         /* set clear context state */
2475         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2476         radeon_ring_write(ring, 0);
2477
2478         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2479         radeon_ring_write(ring, 0x00000316);
2480         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2481         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2482
2483         radeon_ring_unlock_commit(rdev, ring);
2484
2485         return 0;
2486 }
2487
2488 /**
2489  * cik_cp_gfx_fini - stop the gfx ring
2490  *
2491  * @rdev: radeon_device pointer
2492  *
2493  * Stop the gfx ring and tear down the driver ring
2494  * info.
2495  */
2496 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2497 {
2498         cik_cp_gfx_enable(rdev, false);
2499         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2500 }
2501
2502 /**
2503  * cik_cp_gfx_resume - setup the gfx ring buffer registers
2504  *
2505  * @rdev: radeon_device pointer
2506  *
2507  * Program the location and size of the gfx ring buffer
2508  * and test it to make sure it's working.
2509  * Returns 0 for success, error for failure.
2510  */
2511 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2512 {
2513         struct radeon_ring *ring;
2514         u32 tmp;
2515         u32 rb_bufsz;
2516         u64 rb_addr;
2517         int r;
2518
2519         WREG32(CP_SEM_WAIT_TIMER, 0x0);
2520         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2521
2522         /* Set the write pointer delay */
2523         WREG32(CP_RB_WPTR_DELAY, 0);
2524
2525         /* set the RB to use vmid 0 */
2526         WREG32(CP_RB_VMID, 0);
2527
2528         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2529
2530         /* ring 0 - compute and gfx */
2531         /* Set ring buffer size */
2532         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2533         rb_bufsz = drm_order(ring->ring_size / 8);
2534         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2535 #ifdef __BIG_ENDIAN
2536         tmp |= BUF_SWAP_32BIT;
2537 #endif
2538         WREG32(CP_RB0_CNTL, tmp);
2539
2540         /* Initialize the ring buffer's read and write pointers */
2541         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2542         ring->wptr = 0;
2543         WREG32(CP_RB0_WPTR, ring->wptr);
2544
2545         /* set the wb address wether it's enabled or not */
2546         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2547         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2548
2549         /* scratch register shadowing is no longer supported */
2550         WREG32(SCRATCH_UMSK, 0);
2551
2552         if (!rdev->wb.enabled)
2553                 tmp |= RB_NO_UPDATE;
2554
2555         DRM_MDELAY(1);
2556         WREG32(CP_RB0_CNTL, tmp);
2557
2558         rb_addr = ring->gpu_addr >> 8;
2559         WREG32(CP_RB0_BASE, rb_addr);
2560         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2561
2562         ring->rptr = RREG32(CP_RB0_RPTR);
2563
2564         /* start the ring */
2565         cik_cp_gfx_start(rdev);
2566         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2567         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2568         if (r) {
2569                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2570                 return r;
2571         }
2572         return 0;
2573 }
2574
2575 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2576                               struct radeon_ring *ring)
2577 {
2578         u32 rptr;
2579
2580
2581
2582         if (rdev->wb.enabled) {
2583                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2584         } else {
2585                 spin_lock(&rdev->srbm_mutex);
2586                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2587                 rptr = RREG32(CP_HQD_PQ_RPTR);
2588                 cik_srbm_select(rdev, 0, 0, 0, 0);
2589                 spin_unlock(&rdev->srbm_mutex);
2590         }
2591         rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2592
2593         return rptr;
2594 }
2595
2596 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2597                               struct radeon_ring *ring)
2598 {
2599         u32 wptr;
2600
2601         if (rdev->wb.enabled) {
2602                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2603         } else {
2604                 spin_lock(&rdev->srbm_mutex);
2605                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2606                 wptr = RREG32(CP_HQD_PQ_WPTR);
2607                 cik_srbm_select(rdev, 0, 0, 0, 0);
2608                 spin_unlock(&rdev->srbm_mutex);
2609         }
2610         wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2611
2612         return wptr;
2613 }
2614
2615 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2616                                struct radeon_ring *ring)
2617 {
2618         u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2619
2620         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2621         WDOORBELL32(ring->doorbell_offset, wptr);
2622 }
2623
2624 /**
2625  * cik_cp_compute_enable - enable/disable the compute CP MEs
2626  *
2627  * @rdev: radeon_device pointer
2628  * @enable: enable or disable the MEs
2629  *
2630  * Halts or unhalts the compute MEs.
2631  */
2632 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2633 {
2634         if (enable)
2635                 WREG32(CP_MEC_CNTL, 0);
2636         else
2637                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2638         DRM_UDELAY(50);
2639 }
2640
2641 /**
2642  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2643  *
2644  * @rdev: radeon_device pointer
2645  *
2646  * Loads the compute MEC1&2 ucode.
2647  * Returns 0 for success, -EINVAL if the ucode is not available.
2648  */
2649 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2650 {
2651         const __be32 *fw_data;
2652         int i;
2653
2654         if (!rdev->mec_fw)
2655                 return -EINVAL;
2656
2657         cik_cp_compute_enable(rdev, false);
2658
2659         /* MEC1 */
2660         fw_data = (const __be32 *)rdev->mec_fw->data;
2661         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2662         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2663                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2664         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2665
2666         if (rdev->family == CHIP_KAVERI) {
2667                 /* MEC2 */
2668                 fw_data = (const __be32 *)rdev->mec_fw->data;
2669                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2670                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2671                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2672                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2673         }
2674
2675         return 0;
2676 }
2677
2678 /**
2679  * cik_cp_compute_start - start the compute queues
2680  *
2681  * @rdev: radeon_device pointer
2682  *
2683  * Enable the compute queues.
2684  * Returns 0 for success, error for failure.
2685  */
2686 static int cik_cp_compute_start(struct radeon_device *rdev)
2687 {
2688         cik_cp_compute_enable(rdev, true);
2689
2690         return 0;
2691 }
2692
2693 /**
2694  * cik_cp_compute_fini - stop the compute queues
2695  *
2696  * @rdev: radeon_device pointer
2697  *
2698  * Stop the compute queues and tear down the driver queue
2699  * info.
2700  */
2701 static void cik_cp_compute_fini(struct radeon_device *rdev)
2702 {
2703         int i, idx, r;
2704
2705         cik_cp_compute_enable(rdev, false);
2706
2707         for (i = 0; i < 2; i++) {
2708                 if (i == 0)
2709                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2710                 else
2711                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2712
2713                 if (rdev->ring[idx].mqd_obj) {
2714                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2715                         if (unlikely(r != 0))
2716                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2717
2718                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2719                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2720
2721                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2722                         rdev->ring[idx].mqd_obj = NULL;
2723                 }
2724         }
2725 }
2726
2727 static void cik_mec_fini(struct radeon_device *rdev)
2728 {
2729         int r;
2730
2731         if (rdev->mec.hpd_eop_obj) {
2732                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2733                 if (unlikely(r != 0))
2734                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2735                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2736                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2737
2738                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2739                 rdev->mec.hpd_eop_obj = NULL;
2740         }
2741 }
2742
2743 #define MEC_HPD_SIZE 2048
2744
2745 static int cik_mec_init(struct radeon_device *rdev)
2746 {
2747         int r;
2748         u32 *hpd;
2749
2750         /*
2751          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2752          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2753          */
2754         if (rdev->family == CHIP_KAVERI)
2755                 rdev->mec.num_mec = 2;
2756         else
2757                 rdev->mec.num_mec = 1;
2758         rdev->mec.num_pipe = 4;
2759         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2760
2761         if (rdev->mec.hpd_eop_obj == NULL) {
2762                 r = radeon_bo_create(rdev,
2763                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2764                                      PAGE_SIZE, true,
2765                                      RADEON_GEM_DOMAIN_GTT, NULL,
2766                                      &rdev->mec.hpd_eop_obj);
2767                 if (r) {
2768                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2769                         return r;
2770                 }
2771         }
2772
2773         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2774         if (unlikely(r != 0)) {
2775                 cik_mec_fini(rdev);
2776                 return r;
2777         }
2778         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2779                           &rdev->mec.hpd_eop_gpu_addr);
2780         if (r) {
2781                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2782                 cik_mec_fini(rdev);
2783                 return r;
2784         }
2785         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2786         if (r) {
2787                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2788                 cik_mec_fini(rdev);
2789                 return r;
2790         }
2791
2792         /* clear memory.  Not sure if this is required or not */
2793         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2794
2795         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2796         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2797
2798         return 0;
2799 }
2800
2801 struct hqd_registers
2802 {
2803         u32 cp_mqd_base_addr;
2804         u32 cp_mqd_base_addr_hi;
2805         u32 cp_hqd_active;
2806         u32 cp_hqd_vmid;
2807         u32 cp_hqd_persistent_state;
2808         u32 cp_hqd_pipe_priority;
2809         u32 cp_hqd_queue_priority;
2810         u32 cp_hqd_quantum;
2811         u32 cp_hqd_pq_base;
2812         u32 cp_hqd_pq_base_hi;
2813         u32 cp_hqd_pq_rptr;
2814         u32 cp_hqd_pq_rptr_report_addr;
2815         u32 cp_hqd_pq_rptr_report_addr_hi;
2816         u32 cp_hqd_pq_wptr_poll_addr;
2817         u32 cp_hqd_pq_wptr_poll_addr_hi;
2818         u32 cp_hqd_pq_doorbell_control;
2819         u32 cp_hqd_pq_wptr;
2820         u32 cp_hqd_pq_control;
2821         u32 cp_hqd_ib_base_addr;
2822         u32 cp_hqd_ib_base_addr_hi;
2823         u32 cp_hqd_ib_rptr;
2824         u32 cp_hqd_ib_control;
2825         u32 cp_hqd_iq_timer;
2826         u32 cp_hqd_iq_rptr;
2827         u32 cp_hqd_dequeue_request;
2828         u32 cp_hqd_dma_offload;
2829         u32 cp_hqd_sema_cmd;
2830         u32 cp_hqd_msg_type;
2831         u32 cp_hqd_atomic0_preop_lo;
2832         u32 cp_hqd_atomic0_preop_hi;
2833         u32 cp_hqd_atomic1_preop_lo;
2834         u32 cp_hqd_atomic1_preop_hi;
2835         u32 cp_hqd_hq_scheduler0;
2836         u32 cp_hqd_hq_scheduler1;
2837         u32 cp_mqd_control;
2838 };
2839
2840 struct bonaire_mqd
2841 {
2842         u32 header;
2843         u32 dispatch_initiator;
2844         u32 dimensions[3];
2845         u32 start_idx[3];
2846         u32 num_threads[3];
2847         u32 pipeline_stat_enable;
2848         u32 perf_counter_enable;
2849         u32 pgm[2];
2850         u32 tba[2];
2851         u32 tma[2];
2852         u32 pgm_rsrc[2];
2853         u32 vmid;
2854         u32 resource_limits;
2855         u32 static_thread_mgmt01[2];
2856         u32 tmp_ring_size;
2857         u32 static_thread_mgmt23[2];
2858         u32 restart[3];
2859         u32 thread_trace_enable;
2860         u32 reserved1;
2861         u32 user_data[16];
2862         u32 vgtcs_invoke_count[2];
2863         struct hqd_registers queue_state;
2864         u32 dequeue_cntr;
2865         u32 interrupt_queue[64];
2866 };
2867
2868 /**
2869  * cik_cp_compute_resume - setup the compute queue registers
2870  *
2871  * @rdev: radeon_device pointer
2872  *
2873  * Program the compute queues and test them to make sure they
2874  * are working.
2875  * Returns 0 for success, error for failure.
2876  */
2877 static int cik_cp_compute_resume(struct radeon_device *rdev)
2878 {
2879         int r, i, idx;
2880         u32 tmp;
2881         bool use_doorbell = true;
2882         u64 hqd_gpu_addr;
2883         u64 mqd_gpu_addr;
2884         u64 eop_gpu_addr;
2885         u64 wb_gpu_addr;
2886         u32 *buf;
2887         struct bonaire_mqd *mqd;
2888
2889         r = cik_cp_compute_start(rdev);
2890         if (r)
2891                 return r;
2892
2893         /* fix up chicken bits */
2894         tmp = RREG32(CP_CPF_DEBUG);
2895         tmp |= (1 << 23);
2896         WREG32(CP_CPF_DEBUG, tmp);
2897
2898         /* init the pipes */
2899         spin_lock(&rdev->srbm_mutex);
2900         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2901                 int me = (i < 4) ? 1 : 2;
2902                 int pipe = (i < 4) ? i : (i - 4);
2903
2904                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2905
2906                 cik_srbm_select(rdev, me, pipe, 0, 0);
2907
2908                 /* write the EOP addr */
2909                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2910                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2911
2912                 /* set the VMID assigned */
2913                 WREG32(CP_HPD_EOP_VMID, 0);
2914
2915                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2916                 tmp = RREG32(CP_HPD_EOP_CONTROL);
2917                 tmp &= ~EOP_SIZE_MASK;
2918                 tmp |= drm_order(MEC_HPD_SIZE / 8);
2919                 WREG32(CP_HPD_EOP_CONTROL, tmp);
2920         }
2921         cik_srbm_select(rdev, 0, 0, 0, 0);
2922         spin_unlock(&rdev->srbm_mutex);
2923
2924         /* init the queues.  Just two for now. */
2925         for (i = 0; i < 2; i++) {
2926                 if (i == 0)
2927                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2928                 else
2929                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2930
2931                 if (rdev->ring[idx].mqd_obj == NULL) {
2932                         r = radeon_bo_create(rdev,
2933                                              sizeof(struct bonaire_mqd),
2934                                              PAGE_SIZE, true,
2935                                              RADEON_GEM_DOMAIN_GTT, NULL,
2936                                              &rdev->ring[idx].mqd_obj);
2937                         if (r) {
2938                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2939                                 return r;
2940                         }
2941                 }
2942
2943                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2944                 if (unlikely(r != 0)) {
2945                         cik_cp_compute_fini(rdev);
2946                         return r;
2947                 }
2948                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2949                                   &mqd_gpu_addr);
2950                 if (r) {
2951                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2952                         cik_cp_compute_fini(rdev);
2953                         return r;
2954                 }
2955                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2956                 if (r) {
2957                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2958                         cik_cp_compute_fini(rdev);
2959                         return r;
2960                 }
2961
2962                 /* doorbell offset */
2963                 rdev->ring[idx].doorbell_offset =
2964                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2965
2966                 /* init the mqd struct */
2967                 memset(buf, 0, sizeof(struct bonaire_mqd));
2968
2969                 mqd = (struct bonaire_mqd *)buf;
2970                 mqd->header = 0xC0310800;
2971                 mqd->static_thread_mgmt01[0] = 0xffffffff;
2972                 mqd->static_thread_mgmt01[1] = 0xffffffff;
2973                 mqd->static_thread_mgmt23[0] = 0xffffffff;
2974                 mqd->static_thread_mgmt23[1] = 0xffffffff;
2975
2976                 spin_lock(&rdev->srbm_mutex);
2977                 cik_srbm_select(rdev, rdev->ring[idx].me,
2978                                 rdev->ring[idx].pipe,
2979                                 rdev->ring[idx].queue, 0);
2980
2981                 /* disable wptr polling */
2982                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2983                 tmp &= ~WPTR_POLL_EN;
2984                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2985
2986                 /* enable doorbell? */
2987                 mqd->queue_state.cp_hqd_pq_doorbell_control =
2988                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2989                 if (use_doorbell)
2990                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2991                 else
2992                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2993                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2994                        mqd->queue_state.cp_hqd_pq_doorbell_control);
2995
2996                 /* disable the queue if it's active */
2997                 mqd->queue_state.cp_hqd_dequeue_request = 0;
2998                 mqd->queue_state.cp_hqd_pq_rptr = 0;
2999                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3000                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3001                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3002                         for (i = 0; i < rdev->usec_timeout; i++) {
3003                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3004                                         break;
3005                                 DRM_UDELAY(1);
3006                         }
3007                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3008                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3009                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3010                 }
3011
3012                 /* set the pointer to the MQD */
3013                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3014                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3015                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3016                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3017                 /* set MQD vmid to 0 */
3018                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3019                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3020                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3021
3022                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3023                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3024                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3025                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3026                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3027                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3028
3029                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3030                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3031                 mqd->queue_state.cp_hqd_pq_control &=
3032                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3033
3034                 mqd->queue_state.cp_hqd_pq_control |=
3035                         drm_order(rdev->ring[idx].ring_size / 8);
3036                 mqd->queue_state.cp_hqd_pq_control |=
3037                         (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3038 #ifdef __BIG_ENDIAN
3039                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3040 #endif
3041                 mqd->queue_state.cp_hqd_pq_control &=
3042                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3043                 mqd->queue_state.cp_hqd_pq_control |=
3044                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3045                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3046
3047                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3048                 if (i == 0)
3049                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3050                 else
3051                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3052                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3053                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3054                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3055                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3056                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3057
3058                 /* set the wb address wether it's enabled or not */
3059                 if (i == 0)
3060                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3061                 else
3062                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3063                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3064                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3065                         upper_32_bits(wb_gpu_addr) & 0xffff;
3066                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3067                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3068                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3069                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3070
3071                 /* enable the doorbell if requested */
3072                 if (use_doorbell) {
3073                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3074                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3075                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3076                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3077                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3078                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3079                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3080                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3081
3082                 } else {
3083                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3084                 }
3085                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3086                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3087
3088                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3089                 rdev->ring[idx].wptr = 0;
3090                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3091                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3092                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3093                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3094
3095                 /* set the vmid for the queue */
3096                 mqd->queue_state.cp_hqd_vmid = 0;
3097                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3098
3099                 /* activate the queue */
3100                 mqd->queue_state.cp_hqd_active = 1;
3101                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3102
3103                 cik_srbm_select(rdev, 0, 0, 0, 0);
3104                 spin_unlock(&rdev->srbm_mutex);
3105
3106                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3107                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3108
3109                 rdev->ring[idx].ready = true;
3110                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3111                 if (r)
3112                         rdev->ring[idx].ready = false;
3113         }
3114
3115         return 0;
3116 }
3117
3118 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3119 {
3120         cik_cp_gfx_enable(rdev, enable);
3121         cik_cp_compute_enable(rdev, enable);
3122 }
3123
3124 static int cik_cp_load_microcode(struct radeon_device *rdev)
3125 {
3126         int r;
3127
3128         r = cik_cp_gfx_load_microcode(rdev);
3129         if (r)
3130                 return r;
3131         r = cik_cp_compute_load_microcode(rdev);
3132         if (r)
3133                 return r;
3134
3135         return 0;
3136 }
3137
3138 static void cik_cp_fini(struct radeon_device *rdev)
3139 {
3140         cik_cp_gfx_fini(rdev);
3141         cik_cp_compute_fini(rdev);
3142 }
3143
3144 static int cik_cp_resume(struct radeon_device *rdev)
3145 {
3146         int r;
3147
3148         /* Reset all cp blocks */
3149         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3150         RREG32(GRBM_SOFT_RESET);
3151         DRM_MDELAY(15);
3152         WREG32(GRBM_SOFT_RESET, 0);
3153         RREG32(GRBM_SOFT_RESET);
3154
3155         r = cik_cp_load_microcode(rdev);
3156         if (r)
3157                 return r;
3158
3159         r = cik_cp_gfx_resume(rdev);
3160         if (r)
3161                 return r;
3162         r = cik_cp_compute_resume(rdev);
3163         if (r)
3164                 return r;
3165
3166         return 0;
3167 }
3168
3169 /*
3170  * sDMA - System DMA
3171  * Starting with CIK, the GPU has new asynchronous
3172  * DMA engines.  These engines are used for compute
3173  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
3174  * and each one supports 1 ring buffer used for gfx
3175  * and 2 queues used for compute.
3176  *
3177  * The programming model is very similar to the CP
3178  * (ring buffer, IBs, etc.), but sDMA has it's own
3179  * packet format that is different from the PM4 format
3180  * used by the CP. sDMA supports copying data, writing
3181  * embedded data, solid fills, and a number of other
3182  * things.  It also has support for tiling/detiling of
3183  * buffers.
3184  */
3185 /**
3186  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3187  *
3188  * @rdev: radeon_device pointer
3189  * @ib: IB object to schedule
3190  *
3191  * Schedule an IB in the DMA ring (CIK).
3192  */
3193 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3194                               struct radeon_ib *ib)
3195 {
3196         struct radeon_ring *ring = &rdev->ring[ib->ring];
3197         u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3198
3199         if (rdev->wb.enabled) {
3200                 u32 next_rptr = ring->wptr + 5;
3201                 while ((next_rptr & 7) != 4)
3202                         next_rptr++;
3203                 next_rptr += 4;
3204                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3205                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3206                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3207                 radeon_ring_write(ring, 1); /* number of DWs to follow */
3208                 radeon_ring_write(ring, next_rptr);
3209         }
3210
3211         /* IB packet must end on a 8 DW boundary */
3212         while ((ring->wptr & 7) != 4)
3213                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3214         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3215         radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3216         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3217         radeon_ring_write(ring, ib->length_dw);
3218
3219 }
3220
3221 /**
3222  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3223  *
3224  * @rdev: radeon_device pointer
3225  * @fence: radeon fence object
3226  *
3227  * Add a DMA fence packet to the ring to write
3228  * the fence seq number and DMA trap packet to generate
3229  * an interrupt if needed (CIK).
3230  */
3231 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3232                               struct radeon_fence *fence)
3233 {
3234         struct radeon_ring *ring = &rdev->ring[fence->ring];
3235         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3236         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3237                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3238         u32 ref_and_mask;
3239
3240         if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3241                 ref_and_mask = SDMA0;
3242         else
3243                 ref_and_mask = SDMA1;
3244
3245         /* write the fence */
3246         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3247         radeon_ring_write(ring, addr & 0xffffffff);
3248         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3249         radeon_ring_write(ring, fence->seq);
3250         /* generate an interrupt */
3251         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3252         /* flush HDP */
3253         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3254         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3255         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3256         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3257         radeon_ring_write(ring, ref_and_mask); /* MASK */
3258         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3259 }
3260
3261 /**
3262  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3263  *
3264  * @rdev: radeon_device pointer
3265  * @ring: radeon_ring structure holding ring information
3266  * @semaphore: radeon semaphore object
3267  * @emit_wait: wait or signal semaphore
3268  *
3269  * Add a DMA semaphore packet to the ring wait on or signal
3270  * other rings (CIK).
3271  */
3272 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3273                                   struct radeon_ring *ring,
3274                                   struct radeon_semaphore *semaphore,
3275                                   bool emit_wait)
3276 {
3277         u64 addr = semaphore->gpu_addr;
3278         u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3279
3280         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3281         radeon_ring_write(ring, addr & 0xfffffff8);
3282         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3283 }
3284
3285 /**
3286  * cik_sdma_gfx_stop - stop the gfx async dma engines
3287  *
3288  * @rdev: radeon_device pointer
3289  *
3290  * Stop the gfx async dma ring buffers (CIK).
3291  */
3292 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3293 {
3294         u32 rb_cntl, reg_offset;
3295         int i;
3296
3297         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3298
3299         for (i = 0; i < 2; i++) {
3300                 if (i == 0)
3301                         reg_offset = SDMA0_REGISTER_OFFSET;
3302                 else
3303                         reg_offset = SDMA1_REGISTER_OFFSET;
3304                 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3305                 rb_cntl &= ~SDMA_RB_ENABLE;
3306                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3307                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3308         }
3309 }
3310
3311 /**
3312  * cik_sdma_rlc_stop - stop the compute async dma engines
3313  *
3314  * @rdev: radeon_device pointer
3315  *
3316  * Stop the compute async dma queues (CIK).
3317  */
3318 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3319 {
3320         /* XXX todo */
3321 }
3322
3323 /**
3324  * cik_sdma_enable - stop the async dma engines
3325  *
3326  * @rdev: radeon_device pointer
3327  * @enable: enable/disable the DMA MEs.
3328  *
3329  * Halt or unhalt the async dma engines (CIK).
3330  */
3331 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3332 {
3333         u32 me_cntl, reg_offset;
3334         int i;
3335
3336         for (i = 0; i < 2; i++) {
3337                 if (i == 0)
3338                         reg_offset = SDMA0_REGISTER_OFFSET;
3339                 else
3340                         reg_offset = SDMA1_REGISTER_OFFSET;
3341                 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3342                 if (enable)
3343                         me_cntl &= ~SDMA_HALT;
3344                 else
3345                         me_cntl |= SDMA_HALT;
3346                 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3347         }
3348 }
3349
3350 /**
3351  * cik_sdma_gfx_resume - setup and start the async dma engines
3352  *
3353  * @rdev: radeon_device pointer
3354  *
3355  * Set up the gfx DMA ring buffers and enable them (CIK).
3356  * Returns 0 for success, error for failure.
3357  */
3358 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3359 {
3360         struct radeon_ring *ring;
3361         u32 rb_cntl, ib_cntl;
3362         u32 rb_bufsz;
3363         u32 reg_offset, wb_offset;
3364         int i, r;
3365
3366         for (i = 0; i < 2; i++) {
3367                 if (i == 0) {
3368                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3369                         reg_offset = SDMA0_REGISTER_OFFSET;
3370                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
3371                 } else {
3372                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3373                         reg_offset = SDMA1_REGISTER_OFFSET;
3374                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3375                 }
3376
3377                 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3378                 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3379
3380                 /* Set ring buffer size in dwords */
3381                 rb_bufsz = drm_order(ring->ring_size / 4);
3382                 rb_cntl = rb_bufsz << 1;
3383 #ifdef __BIG_ENDIAN
3384                 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3385 #endif
3386                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3387
3388                 /* Initialize the ring buffer's read and write pointers */
3389                 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3390                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3391
3392                 /* set the wb address whether it's enabled or not */
3393                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3394                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3395                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3396                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3397
3398                 if (rdev->wb.enabled)
3399                         rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3400
3401                 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3402                 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3403
3404                 ring->wptr = 0;
3405                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3406
3407                 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3408
3409                 /* enable DMA RB */
3410                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3411
3412                 ib_cntl = SDMA_IB_ENABLE;
3413 #ifdef __BIG_ENDIAN
3414                 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3415 #endif
3416                 /* enable DMA IBs */
3417                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3418
3419                 ring->ready = true;
3420
3421                 r = radeon_ring_test(rdev, ring->idx, ring);
3422                 if (r) {
3423                         ring->ready = false;
3424                         return r;
3425                 }
3426         }
3427
3428         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3429
3430         return 0;
3431 }
3432
3433 /**
3434  * cik_sdma_rlc_resume - setup and start the async dma engines
3435  *
3436  * @rdev: radeon_device pointer
3437  *
3438  * Set up the compute DMA queues and enable them (CIK).
3439  * Returns 0 for success, error for failure.
3440  */
3441 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3442 {
3443         /* XXX todo */
3444         return 0;
3445 }
3446
3447 /**
3448  * cik_sdma_load_microcode - load the sDMA ME ucode
3449  *
3450  * @rdev: radeon_device pointer
3451  *
3452  * Loads the sDMA0/1 ucode.
3453  * Returns 0 for success, -EINVAL if the ucode is not available.
3454  */
3455 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3456 {
3457         const __be32 *fw_data;
3458         int i;
3459
3460         if (!rdev->sdma_fw)
3461                 return -EINVAL;
3462
3463         /* stop the gfx rings and rlc compute queues */
3464         cik_sdma_gfx_stop(rdev);
3465         cik_sdma_rlc_stop(rdev);
3466
3467         /* halt the MEs */
3468         cik_sdma_enable(rdev, false);
3469
3470         /* sdma0 */
3471         fw_data = (const __be32 *)rdev->sdma_fw->data;
3472         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3473         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3474                 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3475         WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3476
3477         /* sdma1 */
3478         fw_data = (const __be32 *)rdev->sdma_fw->data;
3479         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3480         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3481                 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3482         WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3483
3484         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3485         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3486         return 0;
3487 }
3488
3489 /**
3490  * cik_sdma_resume - setup and start the async dma engines
3491  *
3492  * @rdev: radeon_device pointer
3493  *
3494  * Set up the DMA engines and enable them (CIK).
3495  * Returns 0 for success, error for failure.
3496  */
3497 static __unused int cik_sdma_resume(struct radeon_device *rdev)
3498 {
3499         int r;
3500
3501         /* Reset dma */
3502         WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3503         RREG32(SRBM_SOFT_RESET);
3504         DRM_UDELAY(50);
3505         WREG32(SRBM_SOFT_RESET, 0);
3506         RREG32(SRBM_SOFT_RESET);
3507
3508         r = cik_sdma_load_microcode(rdev);
3509         if (r)
3510                 return r;
3511
3512         /* unhalt the MEs */
3513         cik_sdma_enable(rdev, true);
3514
3515         /* start the gfx rings and rlc compute queues */
3516         r = cik_sdma_gfx_resume(rdev);
3517         if (r)
3518                 return r;
3519         r = cik_sdma_rlc_resume(rdev);
3520         if (r)
3521                 return r;
3522
3523         return 0;
3524 }
3525
3526 /**
3527  * cik_sdma_fini - tear down the async dma engines
3528  *
3529  * @rdev: radeon_device pointer
3530  *
3531  * Stop the async dma engines and free the rings (CIK).
3532  */
3533 static __unused void cik_sdma_fini(struct radeon_device *rdev)
3534 {
3535         /* stop the gfx rings and rlc compute queues */
3536         cik_sdma_gfx_stop(rdev);
3537         cik_sdma_rlc_stop(rdev);
3538         /* halt the MEs */
3539         cik_sdma_enable(rdev, false);
3540         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3541         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3542         /* XXX - compute dma queue tear down */
3543 }
3544
3545 /**
3546  * cik_copy_dma - copy pages using the DMA engine
3547  *
3548  * @rdev: radeon_device pointer
3549  * @src_offset: src GPU address
3550  * @dst_offset: dst GPU address
3551  * @num_gpu_pages: number of GPU pages to xfer
3552  * @fence: radeon fence object
3553  *
3554  * Copy GPU paging using the DMA engine (CIK).
3555  * Used by the radeon ttm implementation to move pages if
3556  * registered as the asic copy callback.
3557  */
3558 int cik_copy_dma(struct radeon_device *rdev,
3559                  uint64_t src_offset, uint64_t dst_offset,
3560                  unsigned num_gpu_pages,
3561                  struct radeon_fence **fence)
3562 {
3563         struct radeon_semaphore *sem = NULL;
3564         int ring_index = rdev->asic->copy.dma_ring_index;
3565         struct radeon_ring *ring = &rdev->ring[ring_index];
3566         u32 size_in_bytes, cur_size_in_bytes;
3567         int i, num_loops;
3568         int r = 0;
3569
3570         r = radeon_semaphore_create(rdev, &sem);
3571         if (r) {
3572                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3573                 return r;
3574         }
3575
3576         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3577         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3578         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3579         if (r) {
3580                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3581                 radeon_semaphore_free(rdev, &sem, NULL);
3582                 return r;
3583         }
3584
3585         if (radeon_fence_need_sync(*fence, ring->idx)) {
3586                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3587                                             ring->idx);
3588                 radeon_fence_note_sync(*fence, ring->idx);
3589         } else {
3590                 radeon_semaphore_free(rdev, &sem, NULL);
3591         }
3592
3593         for (i = 0; i < num_loops; i++) {
3594                 cur_size_in_bytes = size_in_bytes;
3595                 if (cur_size_in_bytes > 0x1fffff)
3596                         cur_size_in_bytes = 0x1fffff;
3597                 size_in_bytes -= cur_size_in_bytes;
3598                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3599                 radeon_ring_write(ring, cur_size_in_bytes);
3600                 radeon_ring_write(ring, 0); /* src/dst endian swap */
3601                 radeon_ring_write(ring, src_offset & 0xffffffff);
3602                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3603                 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3604                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3605                 src_offset += cur_size_in_bytes;
3606                 dst_offset += cur_size_in_bytes;
3607         }
3608
3609         r = radeon_fence_emit(rdev, fence, ring->idx);
3610         if (r) {
3611                 radeon_ring_unlock_undo(rdev, ring);
3612                 return r;
3613         }
3614
3615         radeon_ring_unlock_commit(rdev, ring);
3616         radeon_semaphore_free(rdev, &sem, *fence);
3617
3618         return r;
3619 }
3620
3621 /**
3622  * cik_sdma_ring_test - simple async dma engine test
3623  *
3624  * @rdev: radeon_device pointer
3625  * @ring: radeon_ring structure holding ring information
3626  *
3627  * Test the DMA engine by writing using it to write an
3628  * value to memory. (CIK).
3629  * Returns 0 for success, error for failure.
3630  */
3631 int cik_sdma_ring_test(struct radeon_device *rdev,
3632                        struct radeon_ring *ring)
3633 {
3634         unsigned i;
3635         int r;
3636         volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr;
3637         u32 tmp;
3638
3639         if (!ptr) {
3640                 DRM_ERROR("invalid vram scratch pointer\n");
3641                 return -EINVAL;
3642         }
3643
3644         tmp = 0xCAFEDEAD;
3645         writel(tmp, ptr);
3646
3647         r = radeon_ring_lock(rdev, ring, 4);
3648         if (r) {
3649                 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3650                 return r;
3651         }
3652         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3653         radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3654         radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3655         radeon_ring_write(ring, 1); /* number of DWs to follow */
3656         radeon_ring_write(ring, 0xDEADBEEF);
3657         radeon_ring_unlock_commit(rdev, ring);
3658
3659         for (i = 0; i < rdev->usec_timeout; i++) {
3660                 tmp = readl(ptr);
3661                 if (tmp == 0xDEADBEEF)
3662                         break;
3663                 DRM_UDELAY(1);
3664         }
3665
3666         if (i < rdev->usec_timeout) {
3667                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3668         } else {
3669                 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3670                           ring->idx, tmp);
3671                 r = -EINVAL;
3672         }
3673         return r;
3674 }
3675
3676 /**
3677  * cik_sdma_ib_test - test an IB on the DMA engine
3678  *
3679  * @rdev: radeon_device pointer
3680  * @ring: radeon_ring structure holding ring information
3681  *
3682  * Test a simple IB in the DMA ring (CIK).
3683  * Returns 0 on success, error on failure.
3684  */
3685 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3686 {
3687         struct radeon_ib ib;
3688         unsigned i;
3689         int r;
3690         volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr;
3691         u32 tmp = 0;
3692
3693         if (!ptr) {
3694                 DRM_ERROR("invalid vram scratch pointer\n");
3695                 return -EINVAL;
3696         }
3697
3698         tmp = 0xCAFEDEAD;
3699         writel(tmp, ptr);
3700
3701         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3702         if (r) {
3703                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3704                 return r;
3705         }
3706
3707         ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3708         ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3709         ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3710         ib.ptr[3] = 1;
3711         ib.ptr[4] = 0xDEADBEEF;
3712         ib.length_dw = 5;
3713
3714         r = radeon_ib_schedule(rdev, &ib, NULL);
3715         if (r) {
3716                 radeon_ib_free(rdev, &ib);
3717                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3718                 return r;
3719         }
3720         r = radeon_fence_wait(ib.fence, false);
3721         if (r) {
3722                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3723                 return r;
3724         }
3725         for (i = 0; i < rdev->usec_timeout; i++) {
3726                 tmp = readl(ptr);
3727                 if (tmp == 0xDEADBEEF)
3728                         break;
3729                 DRM_UDELAY(1);
3730         }
3731         if (i < rdev->usec_timeout) {
3732                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3733         } else {
3734                 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3735                 r = -EINVAL;
3736         }
3737         radeon_ib_free(rdev, &ib);
3738         return r;
3739 }
3740
3741
3742 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3743 {
3744         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
3745                         RREG32(GRBM_STATUS));
3746         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
3747                         RREG32(GRBM_STATUS2));
3748         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3749                         RREG32(GRBM_STATUS_SE0));
3750         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3751                         RREG32(GRBM_STATUS_SE1));
3752         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3753                         RREG32(GRBM_STATUS_SE2));
3754         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3755                         RREG32(GRBM_STATUS_SE3));
3756         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
3757                         RREG32(SRBM_STATUS));
3758         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
3759                         RREG32(SRBM_STATUS2));
3760         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
3761                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3762         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
3763                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3764         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3765         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3766                  RREG32(CP_STALLED_STAT1));
3767         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3768                  RREG32(CP_STALLED_STAT2));
3769         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3770                  RREG32(CP_STALLED_STAT3));
3771         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3772                  RREG32(CP_CPF_BUSY_STAT));
3773         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3774                  RREG32(CP_CPF_STALLED_STAT1));
3775         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3776         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3777         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3778                  RREG32(CP_CPC_STALLED_STAT1));
3779         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3780 }
3781
3782 /**
3783  * cik_gpu_check_soft_reset - check which blocks are busy
3784  *
3785  * @rdev: radeon_device pointer
3786  *
3787  * Check which blocks are busy and return the relevant reset
3788  * mask to be used by cik_gpu_soft_reset().
3789  * Returns a mask of the blocks to be reset.
3790  */
3791 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3792 {
3793         u32 reset_mask = 0;
3794         u32 tmp;
3795
3796         /* GRBM_STATUS */
3797         tmp = RREG32(GRBM_STATUS);
3798         if (tmp & (PA_BUSY | SC_BUSY |
3799                    BCI_BUSY | SX_BUSY |
3800                    TA_BUSY | VGT_BUSY |
3801                    DB_BUSY | CB_BUSY |
3802                    GDS_BUSY | SPI_BUSY |
3803                    IA_BUSY | IA_BUSY_NO_DMA))
3804                 reset_mask |= RADEON_RESET_GFX;
3805
3806         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3807                 reset_mask |= RADEON_RESET_CP;
3808
3809         /* GRBM_STATUS2 */
3810         tmp = RREG32(GRBM_STATUS2);
3811         if (tmp & RLC_BUSY)
3812                 reset_mask |= RADEON_RESET_RLC;
3813
3814         /* SDMA0_STATUS_REG */
3815         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3816         if (!(tmp & SDMA_IDLE))
3817                 reset_mask |= RADEON_RESET_DMA;
3818
3819         /* SDMA1_STATUS_REG */
3820         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3821         if (!(tmp & SDMA_IDLE))
3822                 reset_mask |= RADEON_RESET_DMA1;
3823
3824         /* SRBM_STATUS2 */
3825         tmp = RREG32(SRBM_STATUS2);
3826         if (tmp & SDMA_BUSY)
3827                 reset_mask |= RADEON_RESET_DMA;
3828
3829         if (tmp & SDMA1_BUSY)
3830                 reset_mask |= RADEON_RESET_DMA1;
3831
3832         /* SRBM_STATUS */
3833         tmp = RREG32(SRBM_STATUS);
3834
3835         if (tmp & IH_BUSY)
3836                 reset_mask |= RADEON_RESET_IH;
3837
3838         if (tmp & SEM_BUSY)
3839                 reset_mask |= RADEON_RESET_SEM;
3840
3841         if (tmp & GRBM_RQ_PENDING)
3842                 reset_mask |= RADEON_RESET_GRBM;
3843
3844         if (tmp & VMC_BUSY)
3845                 reset_mask |= RADEON_RESET_VMC;
3846
3847         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3848                    MCC_BUSY | MCD_BUSY))
3849                 reset_mask |= RADEON_RESET_MC;
3850
3851         if (evergreen_is_display_hung(rdev))
3852                 reset_mask |= RADEON_RESET_DISPLAY;
3853
3854         /* Skip MC reset as it's mostly likely not hung, just busy */
3855         if (reset_mask & RADEON_RESET_MC) {
3856                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3857                 reset_mask &= ~RADEON_RESET_MC;
3858         }
3859
3860         return reset_mask;
3861 }
3862
3863 /**
3864  * cik_gpu_soft_reset - soft reset GPU
3865  *
3866  * @rdev: radeon_device pointer
3867  * @reset_mask: mask of which blocks to reset
3868  *
3869  * Soft reset the blocks specified in @reset_mask.
3870  */
3871 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3872 {
3873         struct evergreen_mc_save save;
3874         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3875         u32 tmp;
3876
3877         if (reset_mask == 0)
3878                 return;
3879
3880         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3881
3882         cik_print_gpu_status_regs(rdev);
3883         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3884                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3885         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3886                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3887
3888         /* stop the rlc */
3889         cik_rlc_stop(rdev);
3890
3891         /* Disable GFX parsing/prefetching */
3892         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3893
3894         /* Disable MEC parsing/prefetching */
3895         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3896
3897         if (reset_mask & RADEON_RESET_DMA) {
3898                 /* sdma0 */
3899                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3900                 tmp |= SDMA_HALT;
3901                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3902         }
3903         if (reset_mask & RADEON_RESET_DMA1) {
3904                 /* sdma1 */
3905                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3906                 tmp |= SDMA_HALT;
3907                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3908         }
3909
3910         evergreen_mc_stop(rdev, &save);
3911         if (evergreen_mc_wait_for_idle(rdev)) {
3912                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3913         }
3914         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3915                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3916
3917         if (reset_mask & RADEON_RESET_CP) {
3918                 grbm_soft_reset |= SOFT_RESET_CP;
3919
3920                 srbm_soft_reset |= SOFT_RESET_GRBM;
3921         }
3922
3923         if (reset_mask & RADEON_RESET_DMA)
3924                 srbm_soft_reset |= SOFT_RESET_SDMA;
3925
3926         if (reset_mask & RADEON_RESET_DMA1)
3927                 srbm_soft_reset |= SOFT_RESET_SDMA1;
3928
3929         if (reset_mask & RADEON_RESET_DISPLAY)
3930                 srbm_soft_reset |= SOFT_RESET_DC;
3931
3932         if (reset_mask & RADEON_RESET_RLC)
3933                 grbm_soft_reset |= SOFT_RESET_RLC;
3934
3935         if (reset_mask & RADEON_RESET_SEM)
3936                 srbm_soft_reset |= SOFT_RESET_SEM;
3937
3938         if (reset_mask & RADEON_RESET_IH)
3939                 srbm_soft_reset |= SOFT_RESET_IH;
3940
3941         if (reset_mask & RADEON_RESET_GRBM)
3942                 srbm_soft_reset |= SOFT_RESET_GRBM;
3943
3944         if (reset_mask & RADEON_RESET_VMC)
3945                 srbm_soft_reset |= SOFT_RESET_VMC;
3946
3947         if (!(rdev->flags & RADEON_IS_IGP)) {
3948                 if (reset_mask & RADEON_RESET_MC)
3949                         srbm_soft_reset |= SOFT_RESET_MC;
3950         }
3951
3952         if (grbm_soft_reset) {
3953                 tmp = RREG32(GRBM_SOFT_RESET);
3954                 tmp |= grbm_soft_reset;
3955                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3956                 WREG32(GRBM_SOFT_RESET, tmp);
3957                 tmp = RREG32(GRBM_SOFT_RESET);
3958
3959                 DRM_UDELAY(50);
3960
3961                 tmp &= ~grbm_soft_reset;
3962                 WREG32(GRBM_SOFT_RESET, tmp);
3963                 tmp = RREG32(GRBM_SOFT_RESET);
3964         }
3965
3966         if (srbm_soft_reset) {
3967                 tmp = RREG32(SRBM_SOFT_RESET);
3968                 tmp |= srbm_soft_reset;
3969                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3970                 WREG32(SRBM_SOFT_RESET, tmp);
3971                 tmp = RREG32(SRBM_SOFT_RESET);
3972
3973                 DRM_UDELAY(50);
3974
3975                 tmp &= ~srbm_soft_reset;
3976                 WREG32(SRBM_SOFT_RESET, tmp);
3977                 tmp = RREG32(SRBM_SOFT_RESET);
3978         }
3979
3980         /* Wait a little for things to settle down */
3981         DRM_UDELAY(50);
3982
3983         evergreen_mc_resume(rdev, &save);
3984         DRM_UDELAY(50);
3985
3986         cik_print_gpu_status_regs(rdev);
3987 }
3988
3989 /**
3990  * cik_asic_reset - soft reset GPU
3991  *
3992  * @rdev: radeon_device pointer
3993  *
3994  * Look up which blocks are hung and attempt
3995  * to reset them.
3996  * Returns 0 for success.
3997  */
3998 int cik_asic_reset(struct radeon_device *rdev)
3999 {
4000         u32 reset_mask;
4001
4002         reset_mask = cik_gpu_check_soft_reset(rdev);
4003
4004         if (reset_mask)
4005                 r600_set_bios_scratch_engine_hung(rdev, true);
4006
4007         cik_gpu_soft_reset(rdev, reset_mask);
4008
4009         reset_mask = cik_gpu_check_soft_reset(rdev);
4010
4011         if (!reset_mask)
4012                 r600_set_bios_scratch_engine_hung(rdev, false);
4013
4014         return 0;
4015 }
4016
4017 /**
4018  * cik_gfx_is_lockup - check if the 3D engine is locked up
4019  *
4020  * @rdev: radeon_device pointer
4021  * @ring: radeon_ring structure holding ring information
4022  *
4023  * Check if the 3D engine is locked up (CIK).
4024  * Returns true if the engine is locked, false if not.
4025  */
4026 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4027 {
4028         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4029
4030         if (!(reset_mask & (RADEON_RESET_GFX |
4031                             RADEON_RESET_COMPUTE |
4032                             RADEON_RESET_CP))) {
4033                 radeon_ring_lockup_update(ring);
4034                 return false;
4035         }
4036         /* force CP activities */
4037         radeon_ring_force_activity(rdev, ring);
4038         return radeon_ring_test_lockup(rdev, ring);
4039 }
4040
4041 /**
4042  * cik_sdma_is_lockup - Check if the DMA engine is locked up
4043  *
4044  * @rdev: radeon_device pointer
4045  * @ring: radeon_ring structure holding ring information
4046  *
4047  * Check if the async DMA engine is locked up (CIK).
4048  * Returns true if the engine appears to be locked up, false if not.
4049  */
4050 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4051 {
4052         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4053         u32 mask;
4054
4055         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4056                 mask = RADEON_RESET_DMA;
4057         else
4058                 mask = RADEON_RESET_DMA1;
4059
4060         if (!(reset_mask & mask)) {
4061                 radeon_ring_lockup_update(ring);
4062                 return false;
4063         }
4064         /* force ring activities */
4065         radeon_ring_force_activity(rdev, ring);
4066         return radeon_ring_test_lockup(rdev, ring);
4067 }
4068
4069 /* MC */
4070 /**
4071  * cik_mc_program - program the GPU memory controller
4072  *
4073  * @rdev: radeon_device pointer
4074  *
4075  * Set the location of vram, gart, and AGP in the GPU's
4076  * physical address space (CIK).
4077  */
4078 static __unused void cik_mc_program(struct radeon_device *rdev)
4079 {
4080         struct evergreen_mc_save save;
4081         u32 tmp;
4082         int i, j;
4083
4084         /* Initialize HDP */
4085         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4086                 WREG32((0x2c14 + j), 0x00000000);
4087                 WREG32((0x2c18 + j), 0x00000000);
4088                 WREG32((0x2c1c + j), 0x00000000);
4089                 WREG32((0x2c20 + j), 0x00000000);
4090                 WREG32((0x2c24 + j), 0x00000000);
4091         }
4092         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4093
4094         evergreen_mc_stop(rdev, &save);
4095         if (radeon_mc_wait_for_idle(rdev)) {
4096                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4097         }
4098         /* Lockout access through VGA aperture*/
4099         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4100         /* Update configuration */
4101         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4102                         rdev->mc.vram_start >> 12);
4103         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4104                         rdev->mc.vram_end >> 12);
4105         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4106                         rdev->vram_scratch.gpu_addr >> 12);
4107         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4108         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4109         WREG32(MC_VM_FB_LOCATION, tmp);
4110         /* XXX double check these! */
4111         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4112         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4113         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4114         WREG32(MC_VM_AGP_BASE, 0);
4115         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4116         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4117         if (radeon_mc_wait_for_idle(rdev)) {
4118                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4119         }
4120         evergreen_mc_resume(rdev, &save);
4121         /* we need to own VRAM, so turn off the VGA renderer here
4122          * to stop it overwriting our objects */
4123         rv515_vga_render_disable(rdev);
4124 }
4125
4126 /**
4127  * cik_mc_init - initialize the memory controller driver params
4128  *
4129  * @rdev: radeon_device pointer
4130  *
4131  * Look up the amount of vram, vram width, and decide how to place
4132  * vram and gart within the GPU's physical address space (CIK).
4133  * Returns 0 for success.
4134  */
4135 static __unused int cik_mc_init(struct radeon_device *rdev)
4136 {
4137         u32 tmp;
4138         int chansize, numchan;
4139
4140         /* Get VRAM informations */
4141         rdev->mc.vram_is_ddr = true;
4142         tmp = RREG32(MC_ARB_RAMCFG);
4143         if (tmp & CHANSIZE_MASK) {
4144                 chansize = 64;
4145         } else {
4146                 chansize = 32;
4147         }
4148         tmp = RREG32(MC_SHARED_CHMAP);
4149         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4150                 case 0:
4151                 default:
4152                         numchan = 1;
4153                         break;
4154                 case 1:
4155                         numchan = 2;
4156                         break;
4157                 case 2:
4158                         numchan = 4;
4159                         break;
4160                 case 3:
4161                         numchan = 8;
4162                         break;
4163                 case 4:
4164                         numchan = 3;
4165                         break;
4166                 case 5:
4167                         numchan = 6;
4168                         break;
4169                 case 6:
4170                         numchan = 10;
4171                         break;
4172                 case 7:
4173                         numchan = 12;
4174                         break;
4175                 case 8:
4176                         numchan = 16;
4177                         break;
4178         }
4179         rdev->mc.vram_width = numchan * chansize;
4180         /* Could aper size report 0 ? */
4181         rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
4182         rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
4183         /* size in MB on si */
4184         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4185         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4186         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4187         si_vram_gtt_location(rdev, &rdev->mc);
4188         radeon_update_bandwidth_info(rdev);
4189
4190         return 0;
4191 }
4192
4193 /*
4194  * GART
4195  * VMID 0 is the physical GPU addresses as used by the kernel.
4196  * VMIDs 1-15 are used for userspace clients and are handled
4197  * by the radeon vm/hsa code.
4198  */
4199 /**
4200  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4201  *
4202  * @rdev: radeon_device pointer
4203  *
4204  * Flush the TLB for the VMID 0 page table (CIK).
4205  */
4206 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4207 {
4208         /* flush hdp cache */
4209         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4210
4211         /* bits 0-15 are the VM contexts0-15 */
4212         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4213 }
4214
4215 /**
4216  * cik_pcie_gart_enable - gart enable
4217  *
4218  * @rdev: radeon_device pointer
4219  *
4220  * This sets up the TLBs, programs the page tables for VMID0,
4221  * sets up the hw for VMIDs 1-15 which are allocated on
4222  * demand, and sets up the global locations for the LDS, GDS,
4223  * and GPUVM for FSA64 clients (CIK).
4224  * Returns 0 for success, errors for failure.
4225  */
4226 static __unused int cik_pcie_gart_enable(struct radeon_device *rdev)
4227 {
4228         int r, i;
4229
4230         if (rdev->gart.robj == NULL) {
4231                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4232                 return -EINVAL;
4233         }
4234         r = radeon_gart_table_vram_pin(rdev);
4235         if (r)
4236                 return r;
4237         radeon_gart_restore(rdev);
4238         /* Setup TLB control */
4239         WREG32(MC_VM_MX_L1_TLB_CNTL,
4240                (0xA << 7) |
4241                ENABLE_L1_TLB |
4242                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4243                ENABLE_ADVANCED_DRIVER_MODEL |
4244                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4245         /* Setup L2 cache */
4246         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4247                ENABLE_L2_FRAGMENT_PROCESSING |
4248                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4249                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4250                EFFECTIVE_L2_QUEUE_SIZE(7) |
4251                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4252         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4253         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4254                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4255         /* setup context0 */
4256         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4257         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4258         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4259         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4260                         (u32)(rdev->dummy_page.addr >> 12));
4261         WREG32(VM_CONTEXT0_CNTL2, 0);
4262         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4263                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4264
4265         WREG32(0x15D4, 0);
4266         WREG32(0x15D8, 0);
4267         WREG32(0x15DC, 0);
4268
4269         /* empty context1-15 */
4270         /* FIXME start with 4G, once using 2 level pt switch to full
4271          * vm size space
4272          */
4273         /* set vm size, must be a multiple of 4 */
4274         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4275         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4276         for (i = 1; i < 16; i++) {
4277                 if (i < 8)
4278                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4279                                rdev->gart.table_addr >> 12);
4280                 else
4281                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4282                                rdev->gart.table_addr >> 12);
4283         }
4284
4285         /* enable context1-15 */
4286         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4287                (u32)(rdev->dummy_page.addr >> 12));
4288         WREG32(VM_CONTEXT1_CNTL2, 4);
4289         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4290                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4291                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4292                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4293                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4294                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4295                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4296                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4297                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4298                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4299                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4300                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4301                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4302
4303         /* TC cache setup ??? */
4304         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4305         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4306         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4307
4308         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4309         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4310         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4311         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4312         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4313
4314         WREG32(TC_CFG_L1_VOLATILE, 0);
4315         WREG32(TC_CFG_L2_VOLATILE, 0);
4316
4317         if (rdev->family == CHIP_KAVERI) {
4318                 u32 tmp = RREG32(CHUB_CONTROL);
4319                 tmp &= ~BYPASS_VM;
4320                 WREG32(CHUB_CONTROL, tmp);
4321         }
4322
4323         /* XXX SH_MEM regs */
4324         /* where to put LDS, scratch, GPUVM in FSA64 space */
4325         spin_lock(&rdev->srbm_mutex);
4326         for (i = 0; i < 16; i++) {
4327                 cik_srbm_select(rdev, 0, 0, 0, i);
4328                 /* CP and shaders */
4329                 WREG32(SH_MEM_CONFIG, 0);
4330                 WREG32(SH_MEM_APE1_BASE, 1);
4331                 WREG32(SH_MEM_APE1_LIMIT, 0);
4332                 WREG32(SH_MEM_BASES, 0);
4333                 /* SDMA GFX */
4334                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4335                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4336                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4337                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4338                 /* XXX SDMA RLC - todo */
4339         }
4340         cik_srbm_select(rdev, 0, 0, 0, 0);
4341         spin_unlock(&rdev->srbm_mutex);
4342
4343         cik_pcie_gart_tlb_flush(rdev);
4344         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4345                  (unsigned)(rdev->mc.gtt_size >> 20),
4346                  (unsigned long long)rdev->gart.table_addr);
4347         rdev->gart.ready = true;
4348         return 0;
4349 }
4350
4351 /**
4352  * cik_pcie_gart_disable - gart disable
4353  *
4354  * @rdev: radeon_device pointer
4355  *
4356  * This disables all VM page table (CIK).
4357  */
4358 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4359 {
4360         /* Disable all tables */
4361         WREG32(VM_CONTEXT0_CNTL, 0);
4362         WREG32(VM_CONTEXT1_CNTL, 0);
4363         /* Setup TLB control */
4364         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4365                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4366         /* Setup L2 cache */
4367         WREG32(VM_L2_CNTL,
4368                ENABLE_L2_FRAGMENT_PROCESSING |
4369                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4370                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4371                EFFECTIVE_L2_QUEUE_SIZE(7) |
4372                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4373         WREG32(VM_L2_CNTL2, 0);
4374         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4375                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4376         radeon_gart_table_vram_unpin(rdev);
4377 }
4378
4379 /**
4380  * cik_pcie_gart_fini - vm fini callback
4381  *
4382  * @rdev: radeon_device pointer
4383  *
4384  * Tears down the driver GART/VM setup (CIK).
4385  */
4386 static __unused void cik_pcie_gart_fini(struct radeon_device *rdev)
4387 {
4388         cik_pcie_gart_disable(rdev);
4389         radeon_gart_table_vram_free(rdev);
4390         radeon_gart_fini(rdev);
4391 }
4392
4393 /* vm parser */
4394 /**
4395  * cik_ib_parse - vm ib_parse callback
4396  *
4397  * @rdev: radeon_device pointer
4398  * @ib: indirect buffer pointer
4399  *
4400  * CIK uses hw IB checking so this is a nop (CIK).
4401  */
4402 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4403 {
4404         return 0;
4405 }
4406
4407 /*
4408  * vm
4409  * VMID 0 is the physical GPU addresses as used by the kernel.
4410  * VMIDs 1-15 are used for userspace clients and are handled
4411  * by the radeon vm/hsa code.
4412  */
4413 /**
4414  * cik_vm_init - cik vm init callback
4415  *
4416  * @rdev: radeon_device pointer
4417  *
4418  * Inits cik specific vm parameters (number of VMs, base of vram for
4419  * VMIDs 1-15) (CIK).
4420  * Returns 0 for success.
4421  */
4422 int cik_vm_init(struct radeon_device *rdev)
4423 {
4424         /* number of VMs */
4425         rdev->vm_manager.nvm = 16;
4426         /* base offset of vram pages */
4427         if (rdev->flags & RADEON_IS_IGP) {
4428                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4429                 tmp <<= 22;
4430                 rdev->vm_manager.vram_base_offset = tmp;
4431         } else
4432                 rdev->vm_manager.vram_base_offset = 0;
4433
4434         return 0;
4435 }
4436
4437 /**
4438  * cik_vm_fini - cik vm fini callback
4439  *
4440  * @rdev: radeon_device pointer
4441  *
4442  * Tear down any asic specific VM setup (CIK).
4443  */
4444 void cik_vm_fini(struct radeon_device *rdev)
4445 {
4446 }
4447
4448 /**
4449  * cik_vm_decode_fault - print human readable fault info
4450  *
4451  * @rdev: radeon_device pointer
4452  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4453  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4454  *
4455  * Print human readable fault information (CIK).
4456  */
4457 static void cik_vm_decode_fault(struct radeon_device *rdev,
4458                                 u32 status, u32 addr, u32 mc_client)
4459 {
4460         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4461         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4462         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4463         char *block = (char *)&mc_client;
4464
4465         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4466                protections, vmid, addr,
4467                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4468                block, mc_id);
4469 }
4470
4471 /**
4472  * cik_vm_flush - cik vm flush using the CP
4473  *
4474  * @rdev: radeon_device pointer
4475  *
4476  * Update the page table base and flush the VM TLB
4477  * using the CP (CIK).
4478  */
4479 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4480 {
4481         struct radeon_ring *ring = &rdev->ring[ridx];
4482
4483         if (vm == NULL)
4484                 return;
4485
4486         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4487         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4488                                  WRITE_DATA_DST_SEL(0)));
4489         if (vm->id < 8) {
4490                 radeon_ring_write(ring,
4491                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4492         } else {
4493                 radeon_ring_write(ring,
4494                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4495         }
4496         radeon_ring_write(ring, 0);
4497         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4498
4499         /* update SH_MEM_* regs */
4500         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4501         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4502                                  WRITE_DATA_DST_SEL(0)));
4503         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4504         radeon_ring_write(ring, 0);
4505         radeon_ring_write(ring, VMID(vm->id));
4506
4507         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4508         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4509                                  WRITE_DATA_DST_SEL(0)));
4510         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4511         radeon_ring_write(ring, 0);
4512
4513         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4514         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4515         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4516         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4517
4518         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4519         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4520                                  WRITE_DATA_DST_SEL(0)));
4521         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4522         radeon_ring_write(ring, 0);
4523         radeon_ring_write(ring, VMID(0));
4524
4525         /* HDP flush */
4526         /* We should be using the WAIT_REG_MEM packet here like in
4527          * cik_fence_ring_emit(), but it causes the CP to hang in this
4528          * context...
4529          */
4530         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4531         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4532                                  WRITE_DATA_DST_SEL(0)));
4533         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4534         radeon_ring_write(ring, 0);
4535         radeon_ring_write(ring, 0);
4536
4537         /* bits 0-15 are the VM contexts0-15 */
4538         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4539         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4540                                  WRITE_DATA_DST_SEL(0)));
4541         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4542         radeon_ring_write(ring, 0);
4543         radeon_ring_write(ring, 1 << vm->id);
4544
4545         /* compute doesn't have PFP */
4546         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4547                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4548                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4549                 radeon_ring_write(ring, 0x0);
4550         }
4551 }
4552
4553 /**
4554  * cik_vm_set_page - update the page tables using sDMA
4555  *
4556  * @rdev: radeon_device pointer
4557  * @ib: indirect buffer to fill with commands
4558  * @pe: addr of the page entry
4559  * @addr: dst addr to write into pe
4560  * @count: number of page entries to update
4561  * @incr: increase next addr by incr bytes
4562  * @flags: access flags
4563  *
4564  * Update the page tables using CP or sDMA (CIK).
4565  */
4566 void cik_vm_set_page(struct radeon_device *rdev,
4567                      struct radeon_ib *ib,
4568                      uint64_t pe,
4569                      uint64_t addr, unsigned count,
4570                      uint32_t incr, uint32_t flags)
4571 {
4572         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4573         uint64_t value;
4574         unsigned ndw;
4575
4576         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4577                 /* CP */
4578                 while (count) {
4579                         ndw = 2 + count * 2;
4580                         if (ndw > 0x3FFE)
4581                                 ndw = 0x3FFE;
4582
4583                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4584                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4585                                                     WRITE_DATA_DST_SEL(1));
4586                         ib->ptr[ib->length_dw++] = pe;
4587                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4588                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4589                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4590                                         value = radeon_vm_map_gart(rdev, addr);
4591                                         value &= 0xFFFFFFFFFFFFF000ULL;
4592                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4593                                         value = addr;
4594                                 } else {
4595                                         value = 0;
4596                                 }
4597                                 addr += incr;
4598                                 value |= r600_flags;
4599                                 ib->ptr[ib->length_dw++] = value;
4600                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4601                         }
4602                 }
4603         } else {
4604                 /* DMA */
4605                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4606                         while (count) {
4607                                 ndw = count * 2;
4608                                 if (ndw > 0xFFFFE)
4609                                         ndw = 0xFFFFE;
4610
4611                                 /* for non-physically contiguous pages (system) */
4612                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4613                                 ib->ptr[ib->length_dw++] = pe;
4614                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4615                                 ib->ptr[ib->length_dw++] = ndw;
4616                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4617                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4618                                                 value = radeon_vm_map_gart(rdev, addr);
4619                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4620                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4621                                                 value = addr;
4622                                         } else {
4623                                                 value = 0;
4624                                         }
4625                                         addr += incr;
4626                                         value |= r600_flags;
4627                                         ib->ptr[ib->length_dw++] = value;
4628                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4629                                 }
4630                         }
4631                 } else {
4632                         while (count) {
4633                                 ndw = count;
4634                                 if (ndw > 0x7FFFF)
4635                                         ndw = 0x7FFFF;
4636
4637                                 if (flags & RADEON_VM_PAGE_VALID)
4638                                         value = addr;
4639                                 else
4640                                         value = 0;
4641                                 /* for physically contiguous pages (vram) */
4642                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4643                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4644                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4645                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4646                                 ib->ptr[ib->length_dw++] = 0;
4647                                 ib->ptr[ib->length_dw++] = value; /* value */
4648                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4649                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4650                                 ib->ptr[ib->length_dw++] = 0;
4651                                 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4652                                 pe += ndw * 8;
4653                                 addr += ndw * incr;
4654                                 count -= ndw;
4655                         }
4656                 }
4657                 while (ib->length_dw & 0x7)
4658                         ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4659         }
4660 }
4661
4662 /**
4663  * cik_dma_vm_flush - cik vm flush using sDMA
4664  *
4665  * @rdev: radeon_device pointer
4666  *
4667  * Update the page table base and flush the VM TLB
4668  * using sDMA (CIK).
4669  */
4670 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4671 {
4672         struct radeon_ring *ring = &rdev->ring[ridx];
4673         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4674                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4675         u32 ref_and_mask;
4676
4677         if (vm == NULL)
4678                 return;
4679
4680         if (ridx == R600_RING_TYPE_DMA_INDEX)
4681                 ref_and_mask = SDMA0;
4682         else
4683                 ref_and_mask = SDMA1;
4684
4685         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4686         if (vm->id < 8) {
4687                 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4688         } else {
4689                 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4690         }
4691         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4692
4693         /* update SH_MEM_* regs */
4694         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4696         radeon_ring_write(ring, VMID(vm->id));
4697
4698         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4699         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4700         radeon_ring_write(ring, 0);
4701
4702         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4703         radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4704         radeon_ring_write(ring, 0);
4705
4706         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4707         radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4708         radeon_ring_write(ring, 1);
4709
4710         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4711         radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4712         radeon_ring_write(ring, 0);
4713
4714         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4715         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4716         radeon_ring_write(ring, VMID(0));
4717
4718         /* flush HDP */
4719         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4720         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4721         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4722         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4723         radeon_ring_write(ring, ref_and_mask); /* MASK */
4724         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4725
4726         /* flush TLB */
4727         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4728         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4729         radeon_ring_write(ring, 1 << vm->id);
4730 }
4731
4732 /*
4733  * RLC
4734  * The RLC is a multi-purpose microengine that handles a
4735  * variety of functions, the most important of which is
4736  * the interrupt controller.
4737  */
4738 /**
4739  * cik_rlc_stop - stop the RLC ME
4740  *
4741  * @rdev: radeon_device pointer
4742  *
4743  * Halt the RLC ME (MicroEngine) (CIK).
4744  */
4745 static void cik_rlc_stop(struct radeon_device *rdev)
4746 {
4747         int i, j, k;
4748         u32 mask, tmp;
4749
4750         tmp = RREG32(CP_INT_CNTL_RING0);
4751         tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4752         WREG32(CP_INT_CNTL_RING0, tmp);
4753
4754         RREG32(CB_CGTT_SCLK_CTRL);
4755         RREG32(CB_CGTT_SCLK_CTRL);
4756         RREG32(CB_CGTT_SCLK_CTRL);
4757         RREG32(CB_CGTT_SCLK_CTRL);
4758
4759         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4760         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4761
4762         WREG32(RLC_CNTL, 0);
4763
4764         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4765                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4766                         cik_select_se_sh(rdev, i, j);
4767                         for (k = 0; k < rdev->usec_timeout; k++) {
4768                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4769                                         break;
4770                                 DRM_UDELAY(1);
4771                         }
4772                 }
4773         }
4774         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4775
4776         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4777         for (k = 0; k < rdev->usec_timeout; k++) {
4778                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4779                         break;
4780                 DRM_UDELAY(1);
4781         }
4782 }
4783
4784 /**
4785  * cik_rlc_start - start the RLC ME
4786  *
4787  * @rdev: radeon_device pointer
4788  *
4789  * Unhalt the RLC ME (MicroEngine) (CIK).
4790  */
4791 static void cik_rlc_start(struct radeon_device *rdev)
4792 {
4793         u32 tmp;
4794
4795         WREG32(RLC_CNTL, RLC_ENABLE);
4796
4797         tmp = RREG32(CP_INT_CNTL_RING0);
4798         tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4799         WREG32(CP_INT_CNTL_RING0, tmp);
4800
4801         DRM_UDELAY(50);
4802 }
4803
4804 /**
4805  * cik_rlc_resume - setup the RLC hw
4806  *
4807  * @rdev: radeon_device pointer
4808  *
4809  * Initialize the RLC registers, load the ucode,
4810  * and start the RLC (CIK).
4811  * Returns 0 for success, -EINVAL if the ucode is not available.
4812  */
4813 static __unused int cik_rlc_resume(struct radeon_device *rdev)
4814 {
4815         u32 i, size;
4816         u32 clear_state_info[3];
4817         const __be32 *fw_data;
4818
4819         if (!rdev->rlc_fw)
4820                 return -EINVAL;
4821
4822         switch (rdev->family) {
4823         case CHIP_BONAIRE:
4824         default:
4825                 size = BONAIRE_RLC_UCODE_SIZE;
4826                 break;
4827         case CHIP_KAVERI:
4828                 size = KV_RLC_UCODE_SIZE;
4829                 break;
4830         case CHIP_KABINI:
4831                 size = KB_RLC_UCODE_SIZE;
4832                 break;
4833         }
4834
4835         cik_rlc_stop(rdev);
4836
4837         WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4838         RREG32(GRBM_SOFT_RESET);
4839         DRM_UDELAY(50);
4840         WREG32(GRBM_SOFT_RESET, 0);
4841         RREG32(GRBM_SOFT_RESET);
4842         DRM_UDELAY(50);
4843
4844         WREG32(RLC_LB_CNTR_INIT, 0);
4845         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4846
4847         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4848         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4849         WREG32(RLC_LB_PARAMS, 0x00600408);
4850         WREG32(RLC_LB_CNTL, 0x80000004);
4851
4852         WREG32(RLC_MC_CNTL, 0);
4853         WREG32(RLC_UCODE_CNTL, 0);
4854
4855         fw_data = (const __be32 *)rdev->rlc_fw->data;
4856                 WREG32(RLC_GPM_UCODE_ADDR, 0);
4857         for (i = 0; i < size; i++)
4858                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4859         WREG32(RLC_GPM_UCODE_ADDR, 0);
4860
4861         /* XXX */
4862         clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4863         clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4864         clear_state_info[2] = 0;//cik_default_size;
4865         WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4866         for (i = 0; i < 3; i++)
4867                 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4868         WREG32(RLC_DRIVER_DMA_STATUS, 0);
4869
4870         cik_rlc_start(rdev);
4871
4872         return 0;
4873 }
4874
4875 /*
4876  * Interrupts
4877  * Starting with r6xx, interrupts are handled via a ring buffer.
4878  * Ring buffers are areas of GPU accessible memory that the GPU
4879  * writes interrupt vectors into and the host reads vectors out of.
4880  * There is a rptr (read pointer) that determines where the
4881  * host is currently reading, and a wptr (write pointer)
4882  * which determines where the GPU has written.  When the
4883  * pointers are equal, the ring is idle.  When the GPU
4884  * writes vectors to the ring buffer, it increments the
4885  * wptr.  When there is an interrupt, the host then starts
4886  * fetching commands and processing them until the pointers are
4887  * equal again at which point it updates the rptr.
4888  */
4889
4890 /**
4891  * cik_enable_interrupts - Enable the interrupt ring buffer
4892  *
4893  * @rdev: radeon_device pointer
4894  *
4895  * Enable the interrupt ring buffer (CIK).
4896  */
4897 static void cik_enable_interrupts(struct radeon_device *rdev)
4898 {
4899         u32 ih_cntl = RREG32(IH_CNTL);
4900         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4901
4902         ih_cntl |= ENABLE_INTR;
4903         ih_rb_cntl |= IH_RB_ENABLE;
4904         WREG32(IH_CNTL, ih_cntl);
4905         WREG32(IH_RB_CNTL, ih_rb_cntl);
4906         rdev->ih.enabled = true;
4907 }
4908
4909 /**
4910  * cik_disable_interrupts - Disable the interrupt ring buffer
4911  *
4912  * @rdev: radeon_device pointer
4913  *
4914  * Disable the interrupt ring buffer (CIK).
4915  */
4916 static void cik_disable_interrupts(struct radeon_device *rdev)
4917 {
4918         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4919         u32 ih_cntl = RREG32(IH_CNTL);
4920
4921         ih_rb_cntl &= ~IH_RB_ENABLE;
4922         ih_cntl &= ~ENABLE_INTR;
4923         WREG32(IH_RB_CNTL, ih_rb_cntl);
4924         WREG32(IH_CNTL, ih_cntl);
4925         /* set rptr, wptr to 0 */
4926         WREG32(IH_RB_RPTR, 0);
4927         WREG32(IH_RB_WPTR, 0);
4928         rdev->ih.enabled = false;
4929         rdev->ih.rptr = 0;
4930 }
4931
4932 /**
4933  * cik_disable_interrupt_state - Disable all interrupt sources
4934  *
4935  * @rdev: radeon_device pointer
4936  *
4937  * Clear all interrupt enable bits used by the driver (CIK).
4938  */
4939 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4940 {
4941         u32 tmp;
4942
4943         /* gfx ring */
4944         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4945         /* sdma */
4946         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4947         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4948         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4949         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4950         /* compute queues */
4951         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4952         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4953         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4954         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4955         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4956         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4957         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4958         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4959         /* grbm */
4960         WREG32(GRBM_INT_CNTL, 0);
4961         /* vline/vblank, etc. */
4962         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4963         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4964         if (rdev->num_crtc >= 4) {
4965                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4966                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4967         }
4968         if (rdev->num_crtc >= 6) {
4969                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4970                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4971         }
4972
4973         /* dac hotplug */
4974         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4975
4976         /* digital hotplug */
4977         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4978         WREG32(DC_HPD1_INT_CONTROL, tmp);
4979         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4980         WREG32(DC_HPD2_INT_CONTROL, tmp);
4981         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4982         WREG32(DC_HPD3_INT_CONTROL, tmp);
4983         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4984         WREG32(DC_HPD4_INT_CONTROL, tmp);
4985         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4986         WREG32(DC_HPD5_INT_CONTROL, tmp);
4987         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4988         WREG32(DC_HPD6_INT_CONTROL, tmp);
4989
4990 }
4991
4992 /**
4993  * cik_irq_init - init and enable the interrupt ring
4994  *
4995  * @rdev: radeon_device pointer
4996  *
4997  * Allocate a ring buffer for the interrupt controller,
4998  * enable the RLC, disable interrupts, enable the IH
4999  * ring buffer and enable it (CIK).
5000  * Called at device load and reume.
5001  * Returns 0 for success, errors for failure.
5002  */
5003 static __unused int cik_irq_init(struct radeon_device *rdev)
5004 {
5005         int ret = 0;
5006         int rb_bufsz;
5007         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5008
5009         /* allocate ring */
5010         ret = r600_ih_ring_alloc(rdev);
5011         if (ret)
5012                 return ret;
5013
5014         /* disable irqs */
5015         cik_disable_interrupts(rdev);
5016
5017         /* init rlc */
5018         ret = cik_rlc_resume(rdev);
5019         if (ret) {
5020                 r600_ih_ring_fini(rdev);
5021                 return ret;
5022         }
5023
5024         /* setup interrupt control */
5025         /* XXX this should actually be a bus address, not an MC address. same on older asics */
5026         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5027         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5028         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5029          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5030          */
5031         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5032         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5033         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5034         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5035
5036         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5037         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5038
5039         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5040                       IH_WPTR_OVERFLOW_CLEAR |
5041                       (rb_bufsz << 1));
5042
5043         if (rdev->wb.enabled)
5044                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5045
5046         /* set the writeback address whether it's enabled or not */
5047         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5048         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5049
5050         WREG32(IH_RB_CNTL, ih_rb_cntl);
5051
5052         /* set rptr, wptr to 0 */
5053         WREG32(IH_RB_RPTR, 0);
5054         WREG32(IH_RB_WPTR, 0);
5055
5056         /* Default settings for IH_CNTL (disabled at first) */
5057         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5058         /* RPTR_REARM only works if msi's are enabled */
5059         if (rdev->msi_enabled)
5060                 ih_cntl |= RPTR_REARM;
5061         WREG32(IH_CNTL, ih_cntl);
5062
5063         /* force the active interrupt state to all disabled */
5064         cik_disable_interrupt_state(rdev);
5065
5066         pci_enable_busmaster(rdev->dev);
5067
5068         /* enable irqs */
5069         cik_enable_interrupts(rdev);
5070
5071         return ret;
5072 }
5073
5074 /**
5075  * cik_irq_set - enable/disable interrupt sources
5076  *
5077  * @rdev: radeon_device pointer
5078  *
5079  * Enable interrupt sources on the GPU (vblanks, hpd,
5080  * etc.) (CIK).
5081  * Returns 0 for success, errors for failure.
5082  */
5083 int cik_irq_set(struct radeon_device *rdev)
5084 {
5085         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5086                 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5087         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5088         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5089         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5090         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5091         u32 grbm_int_cntl = 0;
5092         u32 dma_cntl, dma_cntl1;
5093
5094         if (!rdev->irq.installed) {
5095                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5096                 return -EINVAL;
5097         }
5098         /* don't enable anything if the ih is disabled */
5099         if (!rdev->ih.enabled) {
5100                 cik_disable_interrupts(rdev);
5101                 /* force the active interrupt state to all disabled */
5102                 cik_disable_interrupt_state(rdev);
5103                 return 0;
5104         }
5105
5106         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5107         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5108         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5109         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5110         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5111         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5112
5113         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5114         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5115
5116         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5117         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5118         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5119         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5120         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5121         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5122         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5123         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5124
5125         /* enable CP interrupts on all rings */
5126         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5127                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5128                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5129         }
5130         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5131                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5132                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5133                 if (ring->me == 1) {
5134                         switch (ring->pipe) {
5135                         case 0:
5136                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5137                                 break;
5138                         case 1:
5139                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5140                                 break;
5141                         case 2:
5142                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5143                                 break;
5144                         case 3:
5145                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5146                                 break;
5147                         default:
5148                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5149                                 break;
5150                         }
5151                 } else if (ring->me == 2) {
5152                         switch (ring->pipe) {
5153                         case 0:
5154                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5155                                 break;
5156                         case 1:
5157                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5158                                 break;
5159                         case 2:
5160                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5161                                 break;
5162                         case 3:
5163                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5164                                 break;
5165                         default:
5166                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5167                                 break;
5168                         }
5169                 } else {
5170                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5171                 }
5172         }
5173         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5174                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5175                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5176                 if (ring->me == 1) {
5177                         switch (ring->pipe) {
5178                         case 0:
5179                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5180                                 break;
5181                         case 1:
5182                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5183                                 break;
5184                         case 2:
5185                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5186                                 break;
5187                         case 3:
5188                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5189                                 break;
5190                         default:
5191                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5192                                 break;
5193                         }
5194                 } else if (ring->me == 2) {
5195                         switch (ring->pipe) {
5196                         case 0:
5197                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5198                                 break;
5199                         case 1:
5200                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5201                                 break;
5202                         case 2:
5203                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5204                                 break;
5205                         case 3:
5206                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5207                                 break;
5208                         default:
5209                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5210                                 break;
5211                         }
5212                 } else {
5213                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5214                 }
5215         }
5216
5217         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5218                 DRM_DEBUG("cik_irq_set: sw int dma\n");
5219                 dma_cntl |= TRAP_ENABLE;
5220         }
5221
5222         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5223                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5224                 dma_cntl1 |= TRAP_ENABLE;
5225         }
5226
5227         if (rdev->irq.crtc_vblank_int[0] ||
5228             atomic_read(&rdev->irq.pflip[0])) {
5229                 DRM_DEBUG("cik_irq_set: vblank 0\n");
5230                 crtc1 |= VBLANK_INTERRUPT_MASK;
5231         }
5232         if (rdev->irq.crtc_vblank_int[1] ||
5233             atomic_read(&rdev->irq.pflip[1])) {
5234                 DRM_DEBUG("cik_irq_set: vblank 1\n");
5235                 crtc2 |= VBLANK_INTERRUPT_MASK;
5236         }
5237         if (rdev->irq.crtc_vblank_int[2] ||
5238             atomic_read(&rdev->irq.pflip[2])) {
5239                 DRM_DEBUG("cik_irq_set: vblank 2\n");
5240                 crtc3 |= VBLANK_INTERRUPT_MASK;
5241         }
5242         if (rdev->irq.crtc_vblank_int[3] ||
5243             atomic_read(&rdev->irq.pflip[3])) {
5244                 DRM_DEBUG("cik_irq_set: vblank 3\n");
5245                 crtc4 |= VBLANK_INTERRUPT_MASK;
5246         }
5247         if (rdev->irq.crtc_vblank_int[4] ||
5248             atomic_read(&rdev->irq.pflip[4])) {
5249                 DRM_DEBUG("cik_irq_set: vblank 4\n");
5250                 crtc5 |= VBLANK_INTERRUPT_MASK;
5251         }
5252         if (rdev->irq.crtc_vblank_int[5] ||
5253             atomic_read(&rdev->irq.pflip[5])) {
5254                 DRM_DEBUG("cik_irq_set: vblank 5\n");
5255                 crtc6 |= VBLANK_INTERRUPT_MASK;
5256         }
5257         if (rdev->irq.hpd[0]) {
5258                 DRM_DEBUG("cik_irq_set: hpd 1\n");
5259                 hpd1 |= DC_HPDx_INT_EN;
5260         }
5261         if (rdev->irq.hpd[1]) {
5262                 DRM_DEBUG("cik_irq_set: hpd 2\n");
5263                 hpd2 |= DC_HPDx_INT_EN;
5264         }
5265         if (rdev->irq.hpd[2]) {
5266                 DRM_DEBUG("cik_irq_set: hpd 3\n");
5267                 hpd3 |= DC_HPDx_INT_EN;
5268         }
5269         if (rdev->irq.hpd[3]) {
5270                 DRM_DEBUG("cik_irq_set: hpd 4\n");
5271                 hpd4 |= DC_HPDx_INT_EN;
5272         }
5273         if (rdev->irq.hpd[4]) {
5274                 DRM_DEBUG("cik_irq_set: hpd 5\n");
5275                 hpd5 |= DC_HPDx_INT_EN;
5276         }
5277         if (rdev->irq.hpd[5]) {
5278                 DRM_DEBUG("cik_irq_set: hpd 6\n");
5279                 hpd6 |= DC_HPDx_INT_EN;
5280         }
5281
5282         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5283
5284         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5285         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5286
5287         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5288         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5289         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5290         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5291         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5292         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5293         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5294         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5295
5296         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5297
5298         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5299         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5300         if (rdev->num_crtc >= 4) {
5301                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5302                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5303         }
5304         if (rdev->num_crtc >= 6) {
5305                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5306                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5307         }
5308
5309         WREG32(DC_HPD1_INT_CONTROL, hpd1);
5310         WREG32(DC_HPD2_INT_CONTROL, hpd2);
5311         WREG32(DC_HPD3_INT_CONTROL, hpd3);
5312         WREG32(DC_HPD4_INT_CONTROL, hpd4);
5313         WREG32(DC_HPD5_INT_CONTROL, hpd5);
5314         WREG32(DC_HPD6_INT_CONTROL, hpd6);
5315
5316         return 0;
5317 }
5318
5319 /**
5320  * cik_irq_ack - ack interrupt sources
5321  *
5322  * @rdev: radeon_device pointer
5323  *
5324  * Ack interrupt sources on the GPU (vblanks, hpd,
5325  * etc.) (CIK).  Certain interrupts sources are sw
5326  * generated and do not require an explicit ack.
5327  */
5328 static inline void cik_irq_ack(struct radeon_device *rdev)
5329 {
5330         u32 tmp;
5331
5332         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5333         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5334         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5335         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5336         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5337         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5338         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5339
5340         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5341                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5342         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5343                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5344         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5345                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5346         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5347                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5348
5349         if (rdev->num_crtc >= 4) {
5350                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5351                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5352                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5353                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5354                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5355                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5356                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5357                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5358         }
5359
5360         if (rdev->num_crtc >= 6) {
5361                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5362                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5363                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5364                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5365                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5366                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5367                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5368                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5369         }
5370
5371         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5372                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5373                 tmp |= DC_HPDx_INT_ACK;
5374                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5375         }
5376         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5377                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5378                 tmp |= DC_HPDx_INT_ACK;
5379                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5380         }
5381         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5382                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5383                 tmp |= DC_HPDx_INT_ACK;
5384                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5385         }
5386         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5387                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5388                 tmp |= DC_HPDx_INT_ACK;
5389                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5390         }
5391         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5392                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5393                 tmp |= DC_HPDx_INT_ACK;
5394                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5395         }
5396         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5397                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5398                 tmp |= DC_HPDx_INT_ACK;
5399                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5400         }
5401 }
5402
5403 /**
5404  * cik_irq_disable - disable interrupts
5405  *
5406  * @rdev: radeon_device pointer
5407  *
5408  * Disable interrupts on the hw (CIK).
5409  */
5410 static void cik_irq_disable(struct radeon_device *rdev)
5411 {
5412         cik_disable_interrupts(rdev);
5413         /* Wait and acknowledge irq */
5414         DRM_MDELAY(1);
5415         cik_irq_ack(rdev);
5416         cik_disable_interrupt_state(rdev);
5417 }
5418
5419 /**
5420  * cik_irq_disable - disable interrupts for suspend
5421  *
5422  * @rdev: radeon_device pointer
5423  *
5424  * Disable interrupts and stop the RLC (CIK).
5425  * Used for suspend.
5426  */
5427 static void cik_irq_suspend(struct radeon_device *rdev)
5428 {
5429         cik_irq_disable(rdev);
5430         cik_rlc_stop(rdev);
5431 }
5432
5433 /**
5434  * cik_irq_fini - tear down interrupt support
5435  *
5436  * @rdev: radeon_device pointer
5437  *
5438  * Disable interrupts on the hw and free the IH ring
5439  * buffer (CIK).
5440  * Used for driver unload.
5441  */
5442 static __unused void cik_irq_fini(struct radeon_device *rdev)
5443 {
5444         cik_irq_suspend(rdev);
5445         r600_ih_ring_fini(rdev);
5446 }
5447
5448 /**
5449  * cik_get_ih_wptr - get the IH ring buffer wptr
5450  *
5451  * @rdev: radeon_device pointer
5452  *
5453  * Get the IH ring buffer wptr from either the register
5454  * or the writeback memory buffer (CIK).  Also check for
5455  * ring buffer overflow and deal with it.
5456  * Used by cik_irq_process().
5457  * Returns the value of the wptr.
5458  */
5459 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5460 {
5461         u32 wptr, tmp;
5462
5463         if (rdev->wb.enabled)
5464                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5465         else
5466                 wptr = RREG32(IH_RB_WPTR);
5467
5468         if (wptr & RB_OVERFLOW) {
5469                 /* When a ring buffer overflow happen start parsing interrupt
5470                  * from the last not overwritten vector (wptr + 16). Hopefully
5471                  * this should allow us to catchup.
5472                  */
5473                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5474                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5475                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5476                 tmp = RREG32(IH_RB_CNTL);
5477                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5478                 WREG32(IH_RB_CNTL, tmp);
5479         }
5480         return (wptr & rdev->ih.ptr_mask);
5481 }
5482
5483 /*        CIK IV Ring
5484  * Each IV ring entry is 128 bits:
5485  * [7:0]    - interrupt source id
5486  * [31:8]   - reserved
5487  * [59:32]  - interrupt source data
5488  * [63:60]  - reserved
5489  * [71:64]  - RINGID
5490  *            CP:
5491  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5492  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5493  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5494  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5495  *            PIPE_ID - ME0 0=3D
5496  *                    - ME1&2 compute dispatcher (4 pipes each)
5497  *            SDMA:
5498  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
5499  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
5500  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5501  * [79:72]  - VMID
5502  * [95:80]  - PASID
5503  * [127:96] - reserved
5504  */
5505 /**
5506  * cik_irq_process - interrupt handler
5507  *
5508  * @rdev: radeon_device pointer
5509  *
5510  * Interrupt hander (CIK).  Walk the IH ring,
5511  * ack interrupts and schedule work to handle
5512  * interrupt events.
5513  * Returns irq process return code.
5514  */
5515 irqreturn_t cik_irq_process(struct radeon_device *rdev)
5516 {
5517         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5518         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5519         u32 wptr;
5520         u32 rptr;
5521         u32 src_id, src_data, ring_id;
5522         u8 me_id, pipe_id, queue_id;
5523         u32 ring_index;
5524         bool queue_hotplug = false;
5525         bool queue_reset = false;
5526         u32 addr, status, mc_client;
5527
5528         if (!rdev->ih.enabled || rdev->shutdown)
5529                 return IRQ_NONE;
5530
5531         wptr = cik_get_ih_wptr(rdev);
5532
5533 restart_ih:
5534         /* is somebody else already processing irqs? */
5535         if (atomic_xchg(&rdev->ih.lock, 1))
5536                 return IRQ_NONE;
5537
5538         rptr = rdev->ih.rptr;
5539         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5540
5541         /* Order reading of wptr vs. reading of IH ring data */
5542         cpu_lfence();
5543
5544         /* display interrupts */
5545         cik_irq_ack(rdev);
5546
5547         while (rptr != wptr) {
5548                 /* wptr/rptr are in bytes! */
5549                 ring_index = rptr / 4;
5550                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5551                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5552                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5553
5554                 switch (src_id) {
5555                 case 1: /* D1 vblank/vline */
5556                         switch (src_data) {
5557                         case 0: /* D1 vblank */
5558                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5559                                         if (rdev->irq.crtc_vblank_int[0]) {
5560                                                 drm_handle_vblank(rdev->ddev, 0);
5561                                                 rdev->pm.vblank_sync = true;
5562                                                 wake_up(&rdev->irq.vblank_queue);
5563                                         }
5564                                         if (atomic_read(&rdev->irq.pflip[0]))
5565                                                 radeon_crtc_handle_flip(rdev, 0);
5566                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5567                                         DRM_DEBUG("IH: D1 vblank\n");
5568                                 }
5569                                 break;
5570                         case 1: /* D1 vline */
5571                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5572                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5573                                         DRM_DEBUG("IH: D1 vline\n");
5574                                 }
5575                                 break;
5576                         default:
5577                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5578                                 break;
5579                         }
5580                         break;
5581                 case 2: /* D2 vblank/vline */
5582                         switch (src_data) {
5583                         case 0: /* D2 vblank */
5584                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5585                                         if (rdev->irq.crtc_vblank_int[1]) {
5586                                                 drm_handle_vblank(rdev->ddev, 1);
5587                                                 rdev->pm.vblank_sync = true;
5588                                                 wake_up(&rdev->irq.vblank_queue);
5589                                         }
5590                                         if (atomic_read(&rdev->irq.pflip[1]))
5591                                                 radeon_crtc_handle_flip(rdev, 1);
5592                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5593                                         DRM_DEBUG("IH: D2 vblank\n");
5594                                 }
5595                                 break;
5596                         case 1: /* D2 vline */
5597                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5598                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5599                                         DRM_DEBUG("IH: D2 vline\n");
5600                                 }
5601                                 break;
5602                         default:
5603                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5604                                 break;
5605                         }
5606                         break;
5607                 case 3: /* D3 vblank/vline */
5608                         switch (src_data) {
5609                         case 0: /* D3 vblank */
5610                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5611                                         if (rdev->irq.crtc_vblank_int[2]) {
5612                                                 drm_handle_vblank(rdev->ddev, 2);
5613                                                 rdev->pm.vblank_sync = true;
5614                                                 wake_up(&rdev->irq.vblank_queue);
5615                                         }
5616                                         if (atomic_read(&rdev->irq.pflip[2]))
5617                                                 radeon_crtc_handle_flip(rdev, 2);
5618                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5619                                         DRM_DEBUG("IH: D3 vblank\n");
5620                                 }
5621                                 break;
5622                         case 1: /* D3 vline */
5623                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5624                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5625                                         DRM_DEBUG("IH: D3 vline\n");
5626                                 }
5627                                 break;
5628                         default:
5629                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5630                                 break;
5631                         }
5632                         break;
5633                 case 4: /* D4 vblank/vline */
5634                         switch (src_data) {
5635                         case 0: /* D4 vblank */
5636                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5637                                         if (rdev->irq.crtc_vblank_int[3]) {
5638                                                 drm_handle_vblank(rdev->ddev, 3);
5639                                                 rdev->pm.vblank_sync = true;
5640                                                 wake_up(&rdev->irq.vblank_queue);
5641                                         }
5642                                         if (atomic_read(&rdev->irq.pflip[3]))
5643                                                 radeon_crtc_handle_flip(rdev, 3);
5644                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5645                                         DRM_DEBUG("IH: D4 vblank\n");
5646                                 }
5647                                 break;
5648                         case 1: /* D4 vline */
5649                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5650                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5651                                         DRM_DEBUG("IH: D4 vline\n");
5652                                 }
5653                                 break;
5654                         default:
5655                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5656                                 break;
5657                         }
5658                         break;
5659                 case 5: /* D5 vblank/vline */
5660                         switch (src_data) {
5661                         case 0: /* D5 vblank */
5662                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5663                                         if (rdev->irq.crtc_vblank_int[4]) {
5664                                                 drm_handle_vblank(rdev->ddev, 4);
5665                                                 rdev->pm.vblank_sync = true;
5666                                                 wake_up(&rdev->irq.vblank_queue);
5667                                         }
5668                                         if (atomic_read(&rdev->irq.pflip[4]))
5669                                                 radeon_crtc_handle_flip(rdev, 4);
5670                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5671                                         DRM_DEBUG("IH: D5 vblank\n");
5672                                 }
5673                                 break;
5674                         case 1: /* D5 vline */
5675                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5676                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5677                                         DRM_DEBUG("IH: D5 vline\n");
5678                                 }
5679                                 break;
5680                         default:
5681                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5682                                 break;
5683                         }
5684                         break;
5685                 case 6: /* D6 vblank/vline */
5686                         switch (src_data) {
5687                         case 0: /* D6 vblank */
5688                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5689                                         if (rdev->irq.crtc_vblank_int[5]) {
5690                                                 drm_handle_vblank(rdev->ddev, 5);
5691                                                 rdev->pm.vblank_sync = true;
5692                                                 wake_up(&rdev->irq.vblank_queue);
5693                                         }
5694                                         if (atomic_read(&rdev->irq.pflip[5]))
5695                                                 radeon_crtc_handle_flip(rdev, 5);
5696                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5697                                         DRM_DEBUG("IH: D6 vblank\n");
5698                                 }
5699                                 break;
5700                         case 1: /* D6 vline */
5701                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5702                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5703                                         DRM_DEBUG("IH: D6 vline\n");
5704                                 }
5705                                 break;
5706                         default:
5707                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5708                                 break;
5709                         }
5710                         break;
5711                 case 42: /* HPD hotplug */
5712                         switch (src_data) {
5713                         case 0:
5714                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5715                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5716                                         queue_hotplug = true;
5717                                         DRM_DEBUG("IH: HPD1\n");
5718                                 }
5719                                 break;
5720                         case 1:
5721                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5722                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5723                                         queue_hotplug = true;
5724                                         DRM_DEBUG("IH: HPD2\n");
5725                                 }
5726                                 break;
5727                         case 2:
5728                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5729                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5730                                         queue_hotplug = true;
5731                                         DRM_DEBUG("IH: HPD3\n");
5732                                 }
5733                                 break;
5734                         case 3:
5735                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5736                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5737                                         queue_hotplug = true;
5738                                         DRM_DEBUG("IH: HPD4\n");
5739                                 }
5740                                 break;
5741                         case 4:
5742                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5743                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5744                                         queue_hotplug = true;
5745                                         DRM_DEBUG("IH: HPD5\n");
5746                                 }
5747                                 break;
5748                         case 5:
5749                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5750                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5751                                         queue_hotplug = true;
5752                                         DRM_DEBUG("IH: HPD6\n");
5753                                 }
5754                                 break;
5755                         default:
5756                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5757                                 break;
5758                         }
5759                         break;
5760                 case 146:
5761                 case 147:
5762                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5763                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5764                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5765                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5766                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5767                                 addr);
5768                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5769                                 status);
5770                         cik_vm_decode_fault(rdev, status, addr, mc_client);
5771                         /* reset addr and status */
5772                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5773                         break;
5774                 case 176: /* GFX RB CP_INT */
5775                 case 177: /* GFX IB CP_INT */
5776                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5777                         break;
5778                 case 181: /* CP EOP event */
5779                         DRM_DEBUG("IH: CP EOP\n");
5780                         /* XXX check the bitfield order! */
5781                         me_id = (ring_id & 0x60) >> 5;
5782                         pipe_id = (ring_id & 0x18) >> 3;
5783                         queue_id = (ring_id & 0x7) >> 0;
5784                         switch (me_id) {
5785                         case 0:
5786                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5787                                 break;
5788                         case 1:
5789                         case 2:
5790                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5791                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5792                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5793                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5794                                 break;
5795                         }
5796                         break;
5797                 case 184: /* CP Privileged reg access */
5798                         DRM_ERROR("Illegal register access in command stream\n");
5799                         /* XXX check the bitfield order! */
5800                         me_id = (ring_id & 0x60) >> 5;
5801                         pipe_id = (ring_id & 0x18) >> 3;
5802                         queue_id = (ring_id & 0x7) >> 0;
5803                         switch (me_id) {
5804                         case 0:
5805                                 /* This results in a full GPU reset, but all we need to do is soft
5806                                  * reset the CP for gfx
5807                                  */
5808                                 queue_reset = true;
5809                                 break;
5810                         case 1:
5811                                 /* XXX compute */
5812                                 queue_reset = true;
5813                                 break;
5814                         case 2:
5815                                 /* XXX compute */
5816                                 queue_reset = true;
5817                                 break;
5818                         }
5819                         break;
5820                 case 185: /* CP Privileged inst */
5821                         DRM_ERROR("Illegal instruction in command stream\n");
5822                         /* XXX check the bitfield order! */
5823                         me_id = (ring_id & 0x60) >> 5;
5824                         pipe_id = (ring_id & 0x18) >> 3;
5825                         queue_id = (ring_id & 0x7) >> 0;
5826                         switch (me_id) {
5827                         case 0:
5828                                 /* This results in a full GPU reset, but all we need to do is soft
5829                                  * reset the CP for gfx
5830                                  */
5831                                 queue_reset = true;
5832                                 break;
5833                         case 1:
5834                                 /* XXX compute */
5835                                 queue_reset = true;
5836                                 break;
5837                         case 2:
5838                                 /* XXX compute */
5839                                 queue_reset = true;
5840                                 break;
5841                         }
5842                         break;
5843                 case 224: /* SDMA trap event */
5844                         /* XXX check the bitfield order! */
5845                         me_id = (ring_id & 0x3) >> 0;
5846                         queue_id = (ring_id & 0xc) >> 2;
5847                         DRM_DEBUG("IH: SDMA trap\n");
5848                         switch (me_id) {
5849                         case 0:
5850                                 switch (queue_id) {
5851                                 case 0:
5852                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5853                                         break;
5854                                 case 1:
5855                                         /* XXX compute */
5856                                         break;
5857                                 case 2:
5858                                         /* XXX compute */
5859                                         break;
5860                                 }
5861                                 break;
5862                         case 1:
5863                                 switch (queue_id) {
5864                                 case 0:
5865                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5866                                         break;
5867                                 case 1:
5868                                         /* XXX compute */
5869                                         break;
5870                                 case 2:
5871                                         /* XXX compute */
5872                                         break;
5873                                 }
5874                                 break;
5875                         }
5876                         break;
5877                 case 241: /* SDMA Privileged inst */
5878                 case 247: /* SDMA Privileged inst */
5879                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
5880                         /* XXX check the bitfield order! */
5881                         me_id = (ring_id & 0x3) >> 0;
5882                         queue_id = (ring_id & 0xc) >> 2;
5883                         switch (me_id) {
5884                         case 0:
5885                                 switch (queue_id) {
5886                                 case 0:
5887                                         queue_reset = true;
5888                                         break;
5889                                 case 1:
5890                                         /* XXX compute */
5891                                         queue_reset = true;
5892                                         break;
5893                                 case 2:
5894                                         /* XXX compute */
5895                                         queue_reset = true;
5896                                         break;
5897                                 }
5898                                 break;
5899                         case 1:
5900                                 switch (queue_id) {
5901                                 case 0:
5902                                         queue_reset = true;
5903                                         break;
5904                                 case 1:
5905                                         /* XXX compute */
5906                                         queue_reset = true;
5907                                         break;
5908                                 case 2:
5909                                         /* XXX compute */
5910                                         queue_reset = true;
5911                                         break;
5912                                 }
5913                                 break;
5914                         }
5915                         break;
5916                 case 233: /* GUI IDLE */
5917                         DRM_DEBUG("IH: GUI idle\n");
5918                         break;
5919                 default:
5920                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5921                         break;
5922                 }
5923
5924                 /* wptr/rptr are in bytes! */
5925                 rptr += 16;
5926                 rptr &= rdev->ih.ptr_mask;
5927         }
5928         if (queue_hotplug)
5929                 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
5930         if (queue_reset)
5931                 taskqueue_enqueue(rdev->tq, &rdev->reset_work);
5932         rdev->ih.rptr = rptr;
5933         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5934         atomic_set(&rdev->ih.lock, 0);
5935
5936         /* make sure wptr hasn't changed while processing */
5937         wptr = cik_get_ih_wptr(rdev);
5938         if (wptr != rptr)
5939                 goto restart_ih;
5940
5941         return IRQ_HANDLED;
5942 }
5943
5944 /*
5945  * startup/shutdown callbacks
5946  */
5947 /**
5948  * cik_startup - program the asic to a functional state
5949  *
5950  * @rdev: radeon_device pointer
5951  *
5952  * Programs the asic to a functional state (CIK).
5953  * Called by cik_init() and cik_resume().
5954  * Returns 0 for success, error for failure.
5955  */
5956 static int cik_startup(struct radeon_device *rdev)
5957 {
5958         struct radeon_ring *ring;
5959         int r;
5960
5961         cik_mc_program(rdev);
5962
5963         if (rdev->flags & RADEON_IS_IGP) {
5964                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5965                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5966                         r = cik_init_microcode(rdev);
5967                         if (r) {
5968                                 DRM_ERROR("Failed to load firmware!\n");
5969                                 return r;
5970                         }
5971                 }
5972         } else {
5973                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5974                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5975                     !rdev->mc_fw) {
5976                         r = cik_init_microcode(rdev);
5977                         if (r) {
5978                                 DRM_ERROR("Failed to load firmware!\n");
5979                                 return r;
5980                         }
5981                 }
5982
5983                 r = ci_mc_load_microcode(rdev);
5984                 if (r) {
5985                         DRM_ERROR("Failed to load MC firmware!\n");
5986                         return r;
5987                 }
5988         }
5989
5990         r = r600_vram_scratch_init(rdev);
5991         if (r)
5992                 return r;
5993
5994         r = cik_pcie_gart_enable(rdev);
5995         if (r)
5996                 return r;
5997         cik_gpu_init(rdev);
5998
5999         /* allocate rlc buffers */
6000         r = si_rlc_init(rdev);
6001         if (r) {
6002                 DRM_ERROR("Failed to init rlc BOs!\n");
6003                 return r;
6004         }
6005
6006         /* allocate wb buffer */
6007         r = radeon_wb_init(rdev);
6008         if (r)
6009                 return r;
6010
6011         /* allocate mec buffers */
6012         r = cik_mec_init(rdev);
6013         if (r) {
6014                 DRM_ERROR("Failed to init MEC BOs!\n");
6015                 return r;
6016         }
6017
6018         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6019         if (r) {
6020                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6021                 return r;
6022         }
6023
6024         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6025         if (r) {
6026                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6027                 return r;
6028         }
6029
6030         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6031         if (r) {
6032                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6033                 return r;
6034         }
6035
6036         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6037         if (r) {
6038                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6039                 return r;
6040         }
6041
6042         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6043         if (r) {
6044                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6045                 return r;
6046         }
6047
6048         r = cik_uvd_resume(rdev);
6049         if (!r) {
6050                 r = radeon_fence_driver_start_ring(rdev,
6051                                                    R600_RING_TYPE_UVD_INDEX);
6052                 if (r)
6053                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6054         }
6055         if (r)
6056                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6057
6058         /* Enable IRQ */
6059         if (!rdev->irq.installed) {
6060                 r = radeon_irq_kms_init(rdev);
6061                 if (r)
6062                         return r;
6063         }
6064
6065         r = cik_irq_init(rdev);
6066         if (r) {
6067                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6068                 radeon_irq_kms_fini(rdev);
6069                 return r;
6070         }
6071         cik_irq_set(rdev);
6072
6073         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6074         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6075                              CP_RB0_RPTR, CP_RB0_WPTR,
6076                              0, 0xfffff, RADEON_CP_PACKET2);
6077         if (r)
6078                 return r;
6079
6080         /* set up the compute queues */
6081         /* type-2 packets are deprecated on MEC, use type-3 instead */
6082         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6083         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6084                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6085                              0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6086         if (r)
6087                 return r;
6088         ring->me = 1; /* first MEC */
6089         ring->pipe = 0; /* first pipe */
6090         ring->queue = 0; /* first queue */
6091         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6092
6093         /* type-2 packets are deprecated on MEC, use type-3 instead */
6094         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6095         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6096                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6097                              0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6098         if (r)
6099                 return r;
6100         /* dGPU only have 1 MEC */
6101         ring->me = 1; /* first MEC */
6102         ring->pipe = 0; /* first pipe */
6103         ring->queue = 1; /* second queue */
6104         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6105
6106         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6107         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6108                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6109                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6110                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6111         if (r)
6112                 return r;
6113
6114         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6115         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6116                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6117                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6118                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6119         if (r)
6120                 return r;
6121
6122         r = cik_cp_resume(rdev);
6123         if (r)
6124                 return r;
6125
6126         r = cik_sdma_resume(rdev);
6127         if (r)
6128                 return r;
6129
6130         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6131         if (ring->ring_size) {
6132                 r = radeon_ring_init(rdev, ring, ring->ring_size,
6133                                      R600_WB_UVD_RPTR_OFFSET,
6134                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6135                                      0, 0xfffff, RADEON_CP_PACKET2);
6136                 if (!r)
6137                         r = r600_uvd_init(rdev);
6138                 if (r)
6139                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6140         }
6141
6142         r = radeon_ib_pool_init(rdev);
6143         if (r) {
6144                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6145                 return r;
6146         }
6147
6148         r = radeon_vm_manager_init(rdev);
6149         if (r) {
6150                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6151                 return r;
6152         }
6153
6154         return 0;
6155 }
6156
6157 /**
6158  * cik_resume - resume the asic to a functional state
6159  *
6160  * @rdev: radeon_device pointer
6161  *
6162  * Programs the asic to a functional state (CIK).
6163  * Called at resume.
6164  * Returns 0 for success, error for failure.
6165  */
6166 int cik_resume(struct radeon_device *rdev)
6167 {
6168         int r;
6169
6170         /* post card */
6171         atom_asic_init(rdev->mode_info.atom_context);
6172
6173         /* init golden registers */
6174         cik_init_golden_registers(rdev);
6175
6176         rdev->accel_working = true;
6177         r = cik_startup(rdev);
6178         if (r) {
6179                 DRM_ERROR("cik startup failed on resume\n");
6180                 rdev->accel_working = false;
6181                 return r;
6182         }
6183
6184         return r;
6185
6186 }
6187
6188 /**
6189  * cik_suspend - suspend the asic
6190  *
6191  * @rdev: radeon_device pointer
6192  *
6193  * Bring the chip into a state suitable for suspend (CIK).
6194  * Called at suspend.
6195  * Returns 0 for success.
6196  */
6197 int cik_suspend(struct radeon_device *rdev)
6198 {
6199         radeon_vm_manager_fini(rdev);
6200         cik_cp_enable(rdev, false);
6201         cik_sdma_enable(rdev, false);
6202         r600_uvd_stop(rdev);
6203         radeon_uvd_suspend(rdev);
6204         cik_irq_suspend(rdev);
6205         radeon_wb_disable(rdev);
6206         cik_pcie_gart_disable(rdev);
6207         return 0;
6208 }
6209
6210 /* Plan is to move initialization in that function and use
6211  * helper function so that radeon_device_init pretty much
6212  * do nothing more than calling asic specific function. This
6213  * should also allow to remove a bunch of callback function
6214  * like vram_info.
6215  */
6216 /**
6217  * cik_init - asic specific driver and hw init
6218  *
6219  * @rdev: radeon_device pointer
6220  *
6221  * Setup asic specific driver variables and program the hw
6222  * to a functional state (CIK).
6223  * Called at driver startup.
6224  * Returns 0 for success, errors for failure.
6225  */
6226 int cik_init(struct radeon_device *rdev)
6227 {
6228         struct radeon_ring *ring;
6229         int r;
6230
6231         /* Read BIOS */
6232         if (!radeon_get_bios(rdev)) {
6233                 if (ASIC_IS_AVIVO(rdev))
6234                         return -EINVAL;
6235         }
6236         /* Must be an ATOMBIOS */
6237         if (!rdev->is_atom_bios) {
6238                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6239                 return -EINVAL;
6240         }
6241         r = radeon_atombios_init(rdev);
6242         if (r)
6243                 return r;
6244
6245         /* Post card if necessary */
6246         if (!radeon_card_posted(rdev)) {
6247                 if (!rdev->bios) {
6248                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6249                         return -EINVAL;
6250                 }
6251                 DRM_INFO("GPU not posted. posting now...\n");
6252                 atom_asic_init(rdev->mode_info.atom_context);
6253         }
6254         /* init golden registers */
6255         cik_init_golden_registers(rdev);
6256         /* Initialize scratch registers */
6257         cik_scratch_init(rdev);
6258         /* Initialize surface registers */
6259         radeon_surface_init(rdev);
6260         /* Initialize clocks */
6261         radeon_get_clock_info(rdev->ddev);
6262
6263         /* Fence driver */
6264         r = radeon_fence_driver_init(rdev);
6265         if (r)
6266                 return r;
6267
6268         /* initialize memory controller */
6269         r = cik_mc_init(rdev);
6270         if (r)
6271                 return r;
6272         /* Memory manager */
6273         r = radeon_bo_init(rdev);
6274         if (r)
6275                 return r;
6276
6277         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6278         ring->ring_obj = NULL;
6279         r600_ring_init(rdev, ring, 1024 * 1024);
6280
6281         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6282         ring->ring_obj = NULL;
6283         r600_ring_init(rdev, ring, 1024 * 1024);
6284         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6285         if (r)
6286                 return r;
6287
6288         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6289         ring->ring_obj = NULL;
6290         r600_ring_init(rdev, ring, 1024 * 1024);
6291         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6292         if (r)
6293                 return r;
6294
6295         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6296         ring->ring_obj = NULL;
6297         r600_ring_init(rdev, ring, 256 * 1024);
6298
6299         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6300         ring->ring_obj = NULL;
6301         r600_ring_init(rdev, ring, 256 * 1024);
6302
6303         r = radeon_uvd_init(rdev);
6304         if (!r) {
6305                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6306                 ring->ring_obj = NULL;
6307                 r600_ring_init(rdev, ring, 4096);
6308         }
6309
6310         rdev->ih.ring_obj = NULL;
6311         r600_ih_ring_init(rdev, 64 * 1024);
6312
6313         r = r600_pcie_gart_init(rdev);
6314         if (r)
6315                 return r;
6316
6317         rdev->accel_working = true;
6318         r = cik_startup(rdev);
6319         if (r) {
6320                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6321                 cik_cp_fini(rdev);
6322                 cik_sdma_fini(rdev);
6323                 cik_irq_fini(rdev);
6324                 si_rlc_fini(rdev);
6325                 cik_mec_fini(rdev);
6326                 radeon_wb_fini(rdev);
6327                 radeon_ib_pool_fini(rdev);
6328                 radeon_vm_manager_fini(rdev);
6329                 radeon_irq_kms_fini(rdev);
6330                 cik_pcie_gart_fini(rdev);
6331                 rdev->accel_working = false;
6332         }
6333
6334         /* Don't start up if the MC ucode is missing.
6335          * The default clocks and voltages before the MC ucode
6336          * is loaded are not suffient for advanced operations.
6337          */
6338         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6339                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6340                 return -EINVAL;
6341         }
6342
6343         return 0;
6344 }
6345
6346 /**
6347  * cik_fini - asic specific driver and hw fini
6348  *
6349  * @rdev: radeon_device pointer
6350  *
6351  * Tear down the asic specific driver variables and program the hw
6352  * to an idle state (CIK).
6353  * Called at driver unload.
6354  */
6355 void cik_fini(struct radeon_device *rdev)
6356 {
6357         cik_cp_fini(rdev);
6358         cik_sdma_fini(rdev);
6359         cik_irq_fini(rdev);
6360         si_rlc_fini(rdev);
6361         cik_mec_fini(rdev);
6362         radeon_wb_fini(rdev);
6363         radeon_vm_manager_fini(rdev);
6364         radeon_ib_pool_fini(rdev);
6365         radeon_irq_kms_fini(rdev);
6366         r600_uvd_stop(rdev);
6367         radeon_uvd_fini(rdev);
6368         cik_pcie_gart_fini(rdev);
6369         r600_vram_scratch_fini(rdev);
6370         radeon_gem_fini(rdev);
6371         radeon_fence_driver_fini(rdev);
6372         radeon_bo_fini(rdev);
6373         radeon_atombios_fini(rdev);
6374         kfree(rdev->bios);
6375         rdev->bios = NULL;
6376 }
6377
6378 /* display watermark setup */
6379 /**
6380  * dce8_line_buffer_adjust - Set up the line buffer
6381  *
6382  * @rdev: radeon_device pointer
6383  * @radeon_crtc: the selected display controller
6384  * @mode: the current display mode on the selected display
6385  * controller
6386  *
6387  * Setup up the line buffer allocation for
6388  * the selected display controller (CIK).
6389  * Returns the line buffer size in pixels.
6390  */
6391 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6392                                    struct radeon_crtc *radeon_crtc,
6393                                    struct drm_display_mode *mode)
6394 {
6395         u32 tmp;
6396
6397         /*
6398          * Line Buffer Setup
6399          * There are 6 line buffers, one for each display controllers.
6400          * There are 3 partitions per LB. Select the number of partitions
6401          * to enable based on the display width.  For display widths larger
6402          * than 4096, you need use to use 2 display controllers and combine
6403          * them using the stereo blender.
6404          */
6405         if (radeon_crtc->base.enabled && mode) {
6406                 if (mode->crtc_hdisplay < 1920)
6407                         tmp = 1;
6408                 else if (mode->crtc_hdisplay < 2560)
6409                         tmp = 2;
6410                 else if (mode->crtc_hdisplay < 4096)
6411                         tmp = 0;
6412                 else {
6413                         DRM_DEBUG_KMS("Mode too big for LB!\n");
6414                         tmp = 0;
6415                 }
6416         } else
6417                 tmp = 1;
6418
6419         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6420                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6421
6422         if (radeon_crtc->base.enabled && mode) {
6423                 switch (tmp) {
6424                 case 0:
6425                 default:
6426                         return 4096 * 2;
6427                 case 1:
6428                         return 1920 * 2;
6429                 case 2:
6430                         return 2560 * 2;
6431                 }
6432         }
6433
6434         /* controller not enabled, so no lb used */
6435         return 0;
6436 }
6437
6438 /**
6439  * cik_get_number_of_dram_channels - get the number of dram channels
6440  *
6441  * @rdev: radeon_device pointer
6442  *
6443  * Look up the number of video ram channels (CIK).
6444  * Used for display watermark bandwidth calculations
6445  * Returns the number of dram channels
6446  */
6447 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6448 {
6449         u32 tmp = RREG32(MC_SHARED_CHMAP);
6450
6451         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6452         case 0:
6453         default:
6454                 return 1;
6455         case 1:
6456                 return 2;
6457         case 2:
6458                 return 4;
6459         case 3:
6460                 return 8;
6461         case 4:
6462                 return 3;
6463         case 5:
6464                 return 6;
6465         case 6:
6466                 return 10;
6467         case 7:
6468                 return 12;
6469         case 8:
6470                 return 16;
6471         }
6472 }
6473
6474 struct dce8_wm_params {
6475         u32 dram_channels; /* number of dram channels */
6476         u32 yclk;          /* bandwidth per dram data pin in kHz */
6477         u32 sclk;          /* engine clock in kHz */
6478         u32 disp_clk;      /* display clock in kHz */
6479         u32 src_width;     /* viewport width */
6480         u32 active_time;   /* active display time in ns */
6481         u32 blank_time;    /* blank time in ns */
6482         bool interlaced;    /* mode is interlaced */
6483         fixed20_12 vsc;    /* vertical scale ratio */
6484         u32 num_heads;     /* number of active crtcs */
6485         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6486         u32 lb_size;       /* line buffer allocated to pipe */
6487         u32 vtaps;         /* vertical scaler taps */
6488 };
6489
6490 /**
6491  * dce8_dram_bandwidth - get the dram bandwidth
6492  *
6493  * @wm: watermark calculation data
6494  *
6495  * Calculate the raw dram bandwidth (CIK).
6496  * Used for display watermark bandwidth calculations
6497  * Returns the dram bandwidth in MBytes/s
6498  */
6499 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6500 {
6501         /* Calculate raw DRAM Bandwidth */
6502         fixed20_12 dram_efficiency; /* 0.7 */
6503         fixed20_12 yclk, dram_channels, bandwidth;
6504         fixed20_12 a;
6505
6506         a.full = dfixed_const(1000);
6507         yclk.full = dfixed_const(wm->yclk);
6508         yclk.full = dfixed_div(yclk, a);
6509         dram_channels.full = dfixed_const(wm->dram_channels * 4);
6510         a.full = dfixed_const(10);
6511         dram_efficiency.full = dfixed_const(7);
6512         dram_efficiency.full = dfixed_div(dram_efficiency, a);
6513         bandwidth.full = dfixed_mul(dram_channels, yclk);
6514         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6515
6516         return dfixed_trunc(bandwidth);
6517 }
6518
6519 /**
6520  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6521  *
6522  * @wm: watermark calculation data
6523  *
6524  * Calculate the dram bandwidth used for display (CIK).
6525  * Used for display watermark bandwidth calculations
6526  * Returns the dram bandwidth for display in MBytes/s
6527  */
6528 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6529 {
6530         /* Calculate DRAM Bandwidth and the part allocated to display. */
6531         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6532         fixed20_12 yclk, dram_channels, bandwidth;
6533         fixed20_12 a;
6534
6535         a.full = dfixed_const(1000);
6536         yclk.full = dfixed_const(wm->yclk);
6537         yclk.full = dfixed_div(yclk, a);
6538         dram_channels.full = dfixed_const(wm->dram_channels * 4);
6539         a.full = dfixed_const(10);
6540         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6541         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6542         bandwidth.full = dfixed_mul(dram_channels, yclk);
6543         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6544
6545         return dfixed_trunc(bandwidth);
6546 }
6547
6548 /**
6549  * dce8_data_return_bandwidth - get the data return bandwidth
6550  *
6551  * @wm: watermark calculation data
6552  *
6553  * Calculate the data return bandwidth used for display (CIK).
6554  * Used for display watermark bandwidth calculations
6555  * Returns the data return bandwidth in MBytes/s
6556  */
6557 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6558 {
6559         /* Calculate the display Data return Bandwidth */
6560         fixed20_12 return_efficiency; /* 0.8 */
6561         fixed20_12 sclk, bandwidth;
6562         fixed20_12 a;
6563
6564         a.full = dfixed_const(1000);
6565         sclk.full = dfixed_const(wm->sclk);
6566         sclk.full = dfixed_div(sclk, a);
6567         a.full = dfixed_const(10);
6568         return_efficiency.full = dfixed_const(8);
6569         return_efficiency.full = dfixed_div(return_efficiency, a);
6570         a.full = dfixed_const(32);
6571         bandwidth.full = dfixed_mul(a, sclk);
6572         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6573
6574         return dfixed_trunc(bandwidth);
6575 }
6576
6577 /**
6578  * dce8_dmif_request_bandwidth - get the dmif bandwidth
6579  *
6580  * @wm: watermark calculation data
6581  *
6582  * Calculate the dmif bandwidth used for display (CIK).
6583  * Used for display watermark bandwidth calculations
6584  * Returns the dmif bandwidth in MBytes/s
6585  */
6586 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6587 {
6588         /* Calculate the DMIF Request Bandwidth */
6589         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6590         fixed20_12 disp_clk, bandwidth;
6591         fixed20_12 a, b;
6592
6593         a.full = dfixed_const(1000);
6594         disp_clk.full = dfixed_const(wm->disp_clk);
6595         disp_clk.full = dfixed_div(disp_clk, a);
6596         a.full = dfixed_const(32);
6597         b.full = dfixed_mul(a, disp_clk);
6598
6599         a.full = dfixed_const(10);
6600         disp_clk_request_efficiency.full = dfixed_const(8);
6601         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6602
6603         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6604
6605         return dfixed_trunc(bandwidth);
6606 }
6607
6608 /**
6609  * dce8_available_bandwidth - get the min available bandwidth
6610  *
6611  * @wm: watermark calculation data
6612  *
6613  * Calculate the min available bandwidth used for display (CIK).
6614  * Used for display watermark bandwidth calculations
6615  * Returns the min available bandwidth in MBytes/s
6616  */
6617 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6618 {
6619         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6620         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6621         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6622         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6623
6624         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6625 }
6626
6627 /**
6628  * dce8_average_bandwidth - get the average available bandwidth
6629  *
6630  * @wm: watermark calculation data
6631  *
6632  * Calculate the average available bandwidth used for display (CIK).
6633  * Used for display watermark bandwidth calculations
6634  * Returns the average available bandwidth in MBytes/s
6635  */
6636 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6637 {
6638         /* Calculate the display mode Average Bandwidth
6639          * DisplayMode should contain the source and destination dimensions,
6640          * timing, etc.
6641          */
6642         fixed20_12 bpp;
6643         fixed20_12 line_time;
6644         fixed20_12 src_width;
6645         fixed20_12 bandwidth;
6646         fixed20_12 a;
6647
6648         a.full = dfixed_const(1000);
6649         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6650         line_time.full = dfixed_div(line_time, a);
6651         bpp.full = dfixed_const(wm->bytes_per_pixel);
6652         src_width.full = dfixed_const(wm->src_width);
6653         bandwidth.full = dfixed_mul(src_width, bpp);
6654         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6655         bandwidth.full = dfixed_div(bandwidth, line_time);
6656
6657         return dfixed_trunc(bandwidth);
6658 }
6659
6660 /**
6661  * dce8_latency_watermark - get the latency watermark
6662  *
6663  * @wm: watermark calculation data
6664  *
6665  * Calculate the latency watermark (CIK).
6666  * Used for display watermark bandwidth calculations
6667  * Returns the latency watermark in ns
6668  */
6669 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6670 {
6671         /* First calculate the latency in ns */
6672         u32 mc_latency = 2000; /* 2000 ns. */
6673         u32 available_bandwidth = dce8_available_bandwidth(wm);
6674         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6675         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6676         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6677         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6678                 (wm->num_heads * cursor_line_pair_return_time);
6679         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6680         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6681         u32 tmp, dmif_size = 12288;
6682         fixed20_12 a, b, c;
6683
6684         if (wm->num_heads == 0)
6685                 return 0;
6686
6687         a.full = dfixed_const(2);
6688         b.full = dfixed_const(1);
6689         if ((wm->vsc.full > a.full) ||
6690             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6691             (wm->vtaps >= 5) ||
6692             ((wm->vsc.full >= a.full) && wm->interlaced))
6693                 max_src_lines_per_dst_line = 4;
6694         else
6695                 max_src_lines_per_dst_line = 2;
6696
6697         a.full = dfixed_const(available_bandwidth);
6698         b.full = dfixed_const(wm->num_heads);
6699         a.full = dfixed_div(a, b);
6700
6701         b.full = dfixed_const(mc_latency + 512);
6702         c.full = dfixed_const(wm->disp_clk);
6703         b.full = dfixed_div(b, c);
6704
6705         c.full = dfixed_const(dmif_size);
6706         b.full = dfixed_div(c, b);
6707
6708         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6709
6710         b.full = dfixed_const(1000);
6711         c.full = dfixed_const(wm->disp_clk);
6712         b.full = dfixed_div(c, b);
6713         c.full = dfixed_const(wm->bytes_per_pixel);
6714         b.full = dfixed_mul(b, c);
6715
6716         lb_fill_bw = min(tmp, dfixed_trunc(b));
6717
6718         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6719         b.full = dfixed_const(1000);
6720         c.full = dfixed_const(lb_fill_bw);
6721         b.full = dfixed_div(c, b);
6722         a.full = dfixed_div(a, b);
6723         line_fill_time = dfixed_trunc(a);
6724
6725         if (line_fill_time < wm->active_time)
6726                 return latency;
6727         else
6728                 return latency + (line_fill_time - wm->active_time);
6729
6730 }
6731
6732 /**
6733  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6734  * average and available dram bandwidth
6735  *
6736  * @wm: watermark calculation data
6737  *
6738  * Check if the display average bandwidth fits in the display
6739  * dram bandwidth (CIK).
6740  * Used for display watermark bandwidth calculations
6741  * Returns true if the display fits, false if not.
6742  */
6743 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6744 {
6745         if (dce8_average_bandwidth(wm) <=
6746             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6747                 return true;
6748         else
6749                 return false;
6750 }
6751
6752 /**
6753  * dce8_average_bandwidth_vs_available_bandwidth - check
6754  * average and available bandwidth
6755  *
6756  * @wm: watermark calculation data
6757  *
6758  * Check if the display average bandwidth fits in the display
6759  * available bandwidth (CIK).
6760  * Used for display watermark bandwidth calculations
6761  * Returns true if the display fits, false if not.
6762  */
6763 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6764 {
6765         if (dce8_average_bandwidth(wm) <=
6766             (dce8_available_bandwidth(wm) / wm->num_heads))
6767                 return true;
6768         else
6769                 return false;
6770 }
6771
6772 /**
6773  * dce8_check_latency_hiding - check latency hiding
6774  *
6775  * @wm: watermark calculation data
6776  *
6777  * Check latency hiding (CIK).
6778  * Used for display watermark bandwidth calculations
6779  * Returns true if the display fits, false if not.
6780  */
6781 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6782 {
6783         u32 lb_partitions = wm->lb_size / wm->src_width;
6784         u32 line_time = wm->active_time + wm->blank_time;
6785         u32 latency_tolerant_lines;
6786         u32 latency_hiding;
6787         fixed20_12 a;
6788
6789         a.full = dfixed_const(1);
6790         if (wm->vsc.full > a.full)
6791                 latency_tolerant_lines = 1;
6792         else {
6793                 if (lb_partitions <= (wm->vtaps + 1))
6794                         latency_tolerant_lines = 1;
6795                 else
6796                         latency_tolerant_lines = 2;
6797         }
6798
6799         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6800
6801         if (dce8_latency_watermark(wm) <= latency_hiding)
6802                 return true;
6803         else
6804                 return false;
6805 }
6806
6807 /**
6808  * dce8_program_watermarks - program display watermarks
6809  *
6810  * @rdev: radeon_device pointer
6811  * @radeon_crtc: the selected display controller
6812  * @lb_size: line buffer size
6813  * @num_heads: number of display controllers in use
6814  *
6815  * Calculate and program the display watermarks for the
6816  * selected display controller (CIK).
6817  */
6818 static void dce8_program_watermarks(struct radeon_device *rdev,
6819                                     struct radeon_crtc *radeon_crtc,
6820                                     u32 lb_size, u32 num_heads)
6821 {
6822         struct drm_display_mode *mode = &radeon_crtc->base.mode;
6823         struct dce8_wm_params wm;
6824         u32 pixel_period;
6825         u32 line_time = 0;
6826         u32 latency_watermark_a = 0, latency_watermark_b = 0;
6827         u32 tmp, wm_mask;
6828
6829         if (radeon_crtc->base.enabled && num_heads && mode) {
6830                 pixel_period = 1000000 / (u32)mode->clock;
6831                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6832
6833                 wm.yclk = rdev->pm.current_mclk * 10;
6834                 wm.sclk = rdev->pm.current_sclk * 10;
6835                 wm.disp_clk = mode->clock;
6836                 wm.src_width = mode->crtc_hdisplay;
6837                 wm.active_time = mode->crtc_hdisplay * pixel_period;
6838                 wm.blank_time = line_time - wm.active_time;
6839                 wm.interlaced = false;
6840                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6841                         wm.interlaced = true;
6842                 wm.vsc = radeon_crtc->vsc;
6843                 wm.vtaps = 1;
6844                 if (radeon_crtc->rmx_type != RMX_OFF)
6845                         wm.vtaps = 2;
6846                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6847                 wm.lb_size = lb_size;
6848                 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6849                 wm.num_heads = num_heads;
6850
6851                 /* set for high clocks */
6852                 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6853                 /* set for low clocks */
6854                 /* wm.yclk = low clk; wm.sclk = low clk */
6855                 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6856
6857                 /* possibly force display priority to high */
6858                 /* should really do this at mode validation time... */
6859                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6860                     !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6861                     !dce8_check_latency_hiding(&wm) ||
6862                     (rdev->disp_priority == 2)) {
6863                         DRM_DEBUG_KMS("force priority to high\n");
6864                 }
6865         }
6866
6867         /* select wm A */
6868         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6869         tmp = wm_mask;
6870         tmp &= ~LATENCY_WATERMARK_MASK(3);
6871         tmp |= LATENCY_WATERMARK_MASK(1);
6872         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6873         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6874                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6875                 LATENCY_HIGH_WATERMARK(line_time)));
6876         /* select wm B */
6877         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6878         tmp &= ~LATENCY_WATERMARK_MASK(3);
6879         tmp |= LATENCY_WATERMARK_MASK(2);
6880         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6881         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6882                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6883                 LATENCY_HIGH_WATERMARK(line_time)));
6884         /* restore original selection */
6885         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6886 }
6887
6888 /**
6889  * dce8_bandwidth_update - program display watermarks
6890  *
6891  * @rdev: radeon_device pointer
6892  *
6893  * Calculate and program the display watermarks and line
6894  * buffer allocation (CIK).
6895  */
6896 void dce8_bandwidth_update(struct radeon_device *rdev)
6897 {
6898         struct drm_display_mode *mode = NULL;
6899         u32 num_heads = 0, lb_size;
6900         int i;
6901
6902         radeon_update_display_priority(rdev);
6903
6904         for (i = 0; i < rdev->num_crtc; i++) {
6905                 if (rdev->mode_info.crtcs[i]->base.enabled)
6906                         num_heads++;
6907         }
6908         for (i = 0; i < rdev->num_crtc; i++) {
6909                 mode = &rdev->mode_info.crtcs[i]->base.mode;
6910                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6911                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6912         }
6913 }
6914
6915 /**
6916  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6917  *
6918  * @rdev: radeon_device pointer
6919  *
6920  * Fetches a GPU clock counter snapshot (SI).
6921  * Returns the 64 bit clock counter snapshot.
6922  */
6923 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6924 {
6925         uint64_t clock;
6926
6927         spin_lock(&rdev->gpu_clock_mutex);
6928         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6929         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6930                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6931         spin_unlock(&rdev->gpu_clock_mutex);
6932         return clock;
6933 }
6934
6935 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6936                               u32 cntl_reg, u32 status_reg)
6937 {
6938         int r, i;
6939         struct atom_clock_dividers dividers;
6940         uint32_t tmp;
6941
6942         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6943                                            clock, false, &dividers);
6944         if (r)
6945                 return r;
6946
6947         tmp = RREG32_SMC(cntl_reg);
6948         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6949         tmp |= dividers.post_divider;
6950         WREG32_SMC(cntl_reg, tmp);
6951
6952         for (i = 0; i < 100; i++) {
6953                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6954                         break;
6955                 DRM_MDELAY(10);
6956         }
6957         if (i == 100)
6958                 return -ETIMEDOUT;
6959
6960         return 0;
6961 }
6962
6963 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6964 {
6965         int r = 0;
6966
6967         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6968         if (r)
6969                 return r;
6970
6971         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6972         return r;
6973 }
6974
6975 int cik_uvd_resume(struct radeon_device *rdev)
6976 {
6977         uint64_t addr;
6978         uint32_t size;
6979         int r;
6980
6981         r = radeon_uvd_resume(rdev);
6982         if (r)
6983                 return r;
6984
6985         /* programm the VCPU memory controller bits 0-27 */
6986         addr = rdev->uvd.gpu_addr >> 3;
6987         size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 4) >> 3;
6988         WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6989         WREG32(UVD_VCPU_CACHE_SIZE0, size);
6990
6991         addr += size;
6992         size = RADEON_UVD_STACK_SIZE >> 3;
6993         WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6994         WREG32(UVD_VCPU_CACHE_SIZE1, size);
6995
6996         addr += size;
6997         size = RADEON_UVD_HEAP_SIZE >> 3;
6998         WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6999         WREG32(UVD_VCPU_CACHE_SIZE2, size);
7000
7001         /* bits 28-31 */
7002         addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7003         WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7004
7005         /* bits 32-39 */
7006         addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7007         WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7008
7009         return 0;
7010 }