Merge branch 'vendor/OPENBSD_LIBM'
[dragonfly.git] / sys / dev / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "cikd.h"
30 #include "atom.h"
31 #include "cik_blit_shaders.h"
32
33 /* GFX */
34 #define CIK_PFP_UCODE_SIZE 2144
35 #define CIK_ME_UCODE_SIZE 2144
36 #define CIK_CE_UCODE_SIZE 2144
37 /* compute */
38 #define CIK_MEC_UCODE_SIZE 4192
39 /* interrupts */
40 #define BONAIRE_RLC_UCODE_SIZE 2048
41 #define KB_RLC_UCODE_SIZE 2560
42 #define KV_RLC_UCODE_SIZE 2560
43 /* gddr controller */
44 #define CIK_MC_UCODE_SIZE 7866
45 /* sdma */
46 #define CIK_SDMA_UCODE_SIZE 1050
47 #define CIK_SDMA_UCODE_VERSION 64
48
49 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
62 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
63 MODULE_FIRMWARE("radeon/KABINI_me.bin");
64 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
65 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
66 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
67 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
68
69 static void cik_rlc_stop(struct radeon_device *rdev);
70
71 /*
72  * Indirect registers accessor
73  */
74 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
75 {
76         u32 r;
77
78         WREG32(PCIE_INDEX, reg);
79         (void)RREG32(PCIE_INDEX);
80         r = RREG32(PCIE_DATA);
81         return r;
82 }
83
84 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
85 {
86         WREG32(PCIE_INDEX, reg);
87         (void)RREG32(PCIE_INDEX);
88         WREG32(PCIE_DATA, v);
89         (void)RREG32(PCIE_DATA);
90 }
91
92 static const u32 bonaire_golden_spm_registers[] =
93 {
94         0x30800, 0xe0ffffff, 0xe0000000
95 };
96
97 static const u32 bonaire_golden_common_registers[] =
98 {
99         0xc770, 0xffffffff, 0x00000800,
100         0xc774, 0xffffffff, 0x00000800,
101         0xc798, 0xffffffff, 0x00007fbf,
102         0xc79c, 0xffffffff, 0x00007faf
103 };
104
105 static const u32 bonaire_golden_registers[] =
106 {
107         0x3354, 0x00000333, 0x00000333,
108         0x3350, 0x000c0fc0, 0x00040200,
109         0x9a10, 0x00010000, 0x00058208,
110         0x3c000, 0xffff1fff, 0x00140000,
111         0x3c200, 0xfdfc0fff, 0x00000100,
112         0x3c234, 0x40000000, 0x40000200,
113         0x9830, 0xffffffff, 0x00000000,
114         0x9834, 0xf00fffff, 0x00000400,
115         0x9838, 0x0002021c, 0x00020200,
116         0xc78, 0x00000080, 0x00000000,
117         0x5bb0, 0x000000f0, 0x00000070,
118         0x5bc0, 0xf0311fff, 0x80300000,
119         0x98f8, 0x73773777, 0x12010001,
120         0x350c, 0x00810000, 0x408af000,
121         0x7030, 0x31000111, 0x00000011,
122         0x2f48, 0x73773777, 0x12010001,
123         0x220c, 0x00007fb6, 0x0021a1b1,
124         0x2210, 0x00007fb6, 0x002021b1,
125         0x2180, 0x00007fb6, 0x00002191,
126         0x2218, 0x00007fb6, 0x002121b1,
127         0x221c, 0x00007fb6, 0x002021b1,
128         0x21dc, 0x00007fb6, 0x00002191,
129         0x21e0, 0x00007fb6, 0x00002191,
130         0x3628, 0x0000003f, 0x0000000a,
131         0x362c, 0x0000003f, 0x0000000a,
132         0x2ae4, 0x00073ffe, 0x000022a2,
133         0x240c, 0x000007ff, 0x00000000,
134         0x8a14, 0xf000003f, 0x00000007,
135         0x8bf0, 0x00002001, 0x00000001,
136         0x8b24, 0xffffffff, 0x00ffffff,
137         0x30a04, 0x0000ff0f, 0x00000000,
138         0x28a4c, 0x07ffffff, 0x06000000,
139         0x4d8, 0x00000fff, 0x00000100,
140         0x3e78, 0x00000001, 0x00000002,
141         0x9100, 0x03000000, 0x0362c688,
142         0x8c00, 0x000000ff, 0x00000001,
143         0xe40, 0x00001fff, 0x00001fff,
144         0x9060, 0x0000007f, 0x00000020,
145         0x9508, 0x00010000, 0x00010000,
146         0xac14, 0x000003ff, 0x000000f3,
147         0xac0c, 0xffffffff, 0x00001032
148 };
149
150 static const u32 bonaire_mgcg_cgcg_init[] =
151 {
152         0xc420, 0xffffffff, 0xfffffffc,
153         0x30800, 0xffffffff, 0xe0000000,
154         0x3c2a0, 0xffffffff, 0x00000100,
155         0x3c208, 0xffffffff, 0x00000100,
156         0x3c2c0, 0xffffffff, 0xc0000100,
157         0x3c2c8, 0xffffffff, 0xc0000100,
158         0x3c2c4, 0xffffffff, 0xc0000100,
159         0x55e4, 0xffffffff, 0x00600100,
160         0x3c280, 0xffffffff, 0x00000100,
161         0x3c214, 0xffffffff, 0x06000100,
162         0x3c220, 0xffffffff, 0x00000100,
163         0x3c218, 0xffffffff, 0x06000100,
164         0x3c204, 0xffffffff, 0x00000100,
165         0x3c2e0, 0xffffffff, 0x00000100,
166         0x3c224, 0xffffffff, 0x00000100,
167         0x3c200, 0xffffffff, 0x00000100,
168         0x3c230, 0xffffffff, 0x00000100,
169         0x3c234, 0xffffffff, 0x00000100,
170         0x3c250, 0xffffffff, 0x00000100,
171         0x3c254, 0xffffffff, 0x00000100,
172         0x3c258, 0xffffffff, 0x00000100,
173         0x3c25c, 0xffffffff, 0x00000100,
174         0x3c260, 0xffffffff, 0x00000100,
175         0x3c27c, 0xffffffff, 0x00000100,
176         0x3c278, 0xffffffff, 0x00000100,
177         0x3c210, 0xffffffff, 0x06000100,
178         0x3c290, 0xffffffff, 0x00000100,
179         0x3c274, 0xffffffff, 0x00000100,
180         0x3c2b4, 0xffffffff, 0x00000100,
181         0x3c2b0, 0xffffffff, 0x00000100,
182         0x3c270, 0xffffffff, 0x00000100,
183         0x30800, 0xffffffff, 0xe0000000,
184         0x3c020, 0xffffffff, 0x00010000,
185         0x3c024, 0xffffffff, 0x00030002,
186         0x3c028, 0xffffffff, 0x00040007,
187         0x3c02c, 0xffffffff, 0x00060005,
188         0x3c030, 0xffffffff, 0x00090008,
189         0x3c034, 0xffffffff, 0x00010000,
190         0x3c038, 0xffffffff, 0x00030002,
191         0x3c03c, 0xffffffff, 0x00040007,
192         0x3c040, 0xffffffff, 0x00060005,
193         0x3c044, 0xffffffff, 0x00090008,
194         0x3c048, 0xffffffff, 0x00010000,
195         0x3c04c, 0xffffffff, 0x00030002,
196         0x3c050, 0xffffffff, 0x00040007,
197         0x3c054, 0xffffffff, 0x00060005,
198         0x3c058, 0xffffffff, 0x00090008,
199         0x3c05c, 0xffffffff, 0x00010000,
200         0x3c060, 0xffffffff, 0x00030002,
201         0x3c064, 0xffffffff, 0x00040007,
202         0x3c068, 0xffffffff, 0x00060005,
203         0x3c06c, 0xffffffff, 0x00090008,
204         0x3c070, 0xffffffff, 0x00010000,
205         0x3c074, 0xffffffff, 0x00030002,
206         0x3c078, 0xffffffff, 0x00040007,
207         0x3c07c, 0xffffffff, 0x00060005,
208         0x3c080, 0xffffffff, 0x00090008,
209         0x3c084, 0xffffffff, 0x00010000,
210         0x3c088, 0xffffffff, 0x00030002,
211         0x3c08c, 0xffffffff, 0x00040007,
212         0x3c090, 0xffffffff, 0x00060005,
213         0x3c094, 0xffffffff, 0x00090008,
214         0x3c098, 0xffffffff, 0x00010000,
215         0x3c09c, 0xffffffff, 0x00030002,
216         0x3c0a0, 0xffffffff, 0x00040007,
217         0x3c0a4, 0xffffffff, 0x00060005,
218         0x3c0a8, 0xffffffff, 0x00090008,
219         0x3c000, 0xffffffff, 0x96e00200,
220         0x8708, 0xffffffff, 0x00900100,
221         0xc424, 0xffffffff, 0x0020003f,
222         0x38, 0xffffffff, 0x0140001c,
223         0x3c, 0x000f0000, 0x000f0000,
224         0x220, 0xffffffff, 0xC060000C,
225         0x224, 0xc0000fff, 0x00000100,
226         0xf90, 0xffffffff, 0x00000100,
227         0xf98, 0x00000101, 0x00000000,
228         0x20a8, 0xffffffff, 0x00000104,
229         0x55e4, 0xff000fff, 0x00000100,
230         0x30cc, 0xc0000fff, 0x00000104,
231         0xc1e4, 0x00000001, 0x00000001,
232         0xd00c, 0xff000ff0, 0x00000100,
233         0xd80c, 0xff000ff0, 0x00000100
234 };
235
236 static const u32 spectre_golden_spm_registers[] =
237 {
238         0x30800, 0xe0ffffff, 0xe0000000
239 };
240
241 static const u32 spectre_golden_common_registers[] =
242 {
243         0xc770, 0xffffffff, 0x00000800,
244         0xc774, 0xffffffff, 0x00000800,
245         0xc798, 0xffffffff, 0x00007fbf,
246         0xc79c, 0xffffffff, 0x00007faf
247 };
248
249 static const u32 spectre_golden_registers[] =
250 {
251         0x3c000, 0xffff1fff, 0x96940200,
252         0x3c00c, 0xffff0001, 0xff000000,
253         0x3c200, 0xfffc0fff, 0x00000100,
254         0x6ed8, 0x00010101, 0x00010000,
255         0x9834, 0xf00fffff, 0x00000400,
256         0x9838, 0xfffffffc, 0x00020200,
257         0x5bb0, 0x000000f0, 0x00000070,
258         0x5bc0, 0xf0311fff, 0x80300000,
259         0x98f8, 0x73773777, 0x12010001,
260         0x9b7c, 0x00ff0000, 0x00fc0000,
261         0x2f48, 0x73773777, 0x12010001,
262         0x8a14, 0xf000003f, 0x00000007,
263         0x8b24, 0xffffffff, 0x00ffffff,
264         0x28350, 0x3f3f3fff, 0x00000082,
265         0x28355, 0x0000003f, 0x00000000,
266         0x3e78, 0x00000001, 0x00000002,
267         0x913c, 0xffff03df, 0x00000004,
268         0xc768, 0x00000008, 0x00000008,
269         0x8c00, 0x000008ff, 0x00000800,
270         0x9508, 0x00010000, 0x00010000,
271         0xac0c, 0xffffffff, 0x54763210,
272         0x214f8, 0x01ff01ff, 0x00000002,
273         0x21498, 0x007ff800, 0x00200000,
274         0x2015c, 0xffffffff, 0x00000f40,
275         0x30934, 0xffffffff, 0x00000001
276 };
277
278 static const u32 spectre_mgcg_cgcg_init[] =
279 {
280         0xc420, 0xffffffff, 0xfffffffc,
281         0x30800, 0xffffffff, 0xe0000000,
282         0x3c2a0, 0xffffffff, 0x00000100,
283         0x3c208, 0xffffffff, 0x00000100,
284         0x3c2c0, 0xffffffff, 0x00000100,
285         0x3c2c8, 0xffffffff, 0x00000100,
286         0x3c2c4, 0xffffffff, 0x00000100,
287         0x55e4, 0xffffffff, 0x00600100,
288         0x3c280, 0xffffffff, 0x00000100,
289         0x3c214, 0xffffffff, 0x06000100,
290         0x3c220, 0xffffffff, 0x00000100,
291         0x3c218, 0xffffffff, 0x06000100,
292         0x3c204, 0xffffffff, 0x00000100,
293         0x3c2e0, 0xffffffff, 0x00000100,
294         0x3c224, 0xffffffff, 0x00000100,
295         0x3c200, 0xffffffff, 0x00000100,
296         0x3c230, 0xffffffff, 0x00000100,
297         0x3c234, 0xffffffff, 0x00000100,
298         0x3c250, 0xffffffff, 0x00000100,
299         0x3c254, 0xffffffff, 0x00000100,
300         0x3c258, 0xffffffff, 0x00000100,
301         0x3c25c, 0xffffffff, 0x00000100,
302         0x3c260, 0xffffffff, 0x00000100,
303         0x3c27c, 0xffffffff, 0x00000100,
304         0x3c278, 0xffffffff, 0x00000100,
305         0x3c210, 0xffffffff, 0x06000100,
306         0x3c290, 0xffffffff, 0x00000100,
307         0x3c274, 0xffffffff, 0x00000100,
308         0x3c2b4, 0xffffffff, 0x00000100,
309         0x3c2b0, 0xffffffff, 0x00000100,
310         0x3c270, 0xffffffff, 0x00000100,
311         0x30800, 0xffffffff, 0xe0000000,
312         0x3c020, 0xffffffff, 0x00010000,
313         0x3c024, 0xffffffff, 0x00030002,
314         0x3c028, 0xffffffff, 0x00040007,
315         0x3c02c, 0xffffffff, 0x00060005,
316         0x3c030, 0xffffffff, 0x00090008,
317         0x3c034, 0xffffffff, 0x00010000,
318         0x3c038, 0xffffffff, 0x00030002,
319         0x3c03c, 0xffffffff, 0x00040007,
320         0x3c040, 0xffffffff, 0x00060005,
321         0x3c044, 0xffffffff, 0x00090008,
322         0x3c048, 0xffffffff, 0x00010000,
323         0x3c04c, 0xffffffff, 0x00030002,
324         0x3c050, 0xffffffff, 0x00040007,
325         0x3c054, 0xffffffff, 0x00060005,
326         0x3c058, 0xffffffff, 0x00090008,
327         0x3c05c, 0xffffffff, 0x00010000,
328         0x3c060, 0xffffffff, 0x00030002,
329         0x3c064, 0xffffffff, 0x00040007,
330         0x3c068, 0xffffffff, 0x00060005,
331         0x3c06c, 0xffffffff, 0x00090008,
332         0x3c070, 0xffffffff, 0x00010000,
333         0x3c074, 0xffffffff, 0x00030002,
334         0x3c078, 0xffffffff, 0x00040007,
335         0x3c07c, 0xffffffff, 0x00060005,
336         0x3c080, 0xffffffff, 0x00090008,
337         0x3c084, 0xffffffff, 0x00010000,
338         0x3c088, 0xffffffff, 0x00030002,
339         0x3c08c, 0xffffffff, 0x00040007,
340         0x3c090, 0xffffffff, 0x00060005,
341         0x3c094, 0xffffffff, 0x00090008,
342         0x3c098, 0xffffffff, 0x00010000,
343         0x3c09c, 0xffffffff, 0x00030002,
344         0x3c0a0, 0xffffffff, 0x00040007,
345         0x3c0a4, 0xffffffff, 0x00060005,
346         0x3c0a8, 0xffffffff, 0x00090008,
347         0x3c0ac, 0xffffffff, 0x00010000,
348         0x3c0b0, 0xffffffff, 0x00030002,
349         0x3c0b4, 0xffffffff, 0x00040007,
350         0x3c0b8, 0xffffffff, 0x00060005,
351         0x3c0bc, 0xffffffff, 0x00090008,
352         0x3c000, 0xffffffff, 0x96e00200,
353         0x8708, 0xffffffff, 0x00900100,
354         0xc424, 0xffffffff, 0x0020003f,
355         0x38, 0xffffffff, 0x0140001c,
356         0x3c, 0x000f0000, 0x000f0000,
357         0x220, 0xffffffff, 0xC060000C,
358         0x224, 0xc0000fff, 0x00000100,
359         0xf90, 0xffffffff, 0x00000100,
360         0xf98, 0x00000101, 0x00000000,
361         0x20a8, 0xffffffff, 0x00000104,
362         0x55e4, 0xff000fff, 0x00000100,
363         0x30cc, 0xc0000fff, 0x00000104,
364         0xc1e4, 0x00000001, 0x00000001,
365         0xd00c, 0xff000ff0, 0x00000100,
366         0xd80c, 0xff000ff0, 0x00000100
367 };
368
369 static const u32 kalindi_golden_spm_registers[] =
370 {
371         0x30800, 0xe0ffffff, 0xe0000000
372 };
373
374 static const u32 kalindi_golden_common_registers[] =
375 {
376         0xc770, 0xffffffff, 0x00000800,
377         0xc774, 0xffffffff, 0x00000800,
378         0xc798, 0xffffffff, 0x00007fbf,
379         0xc79c, 0xffffffff, 0x00007faf
380 };
381
382 static const u32 kalindi_golden_registers[] =
383 {
384         0x3c000, 0xffffdfff, 0x6e944040,
385         0x55e4, 0xff607fff, 0xfc000100,
386         0x3c220, 0xff000fff, 0x00000100,
387         0x3c224, 0xff000fff, 0x00000100,
388         0x3c200, 0xfffc0fff, 0x00000100,
389         0x6ed8, 0x00010101, 0x00010000,
390         0x9830, 0xffffffff, 0x00000000,
391         0x9834, 0xf00fffff, 0x00000400,
392         0x5bb0, 0x000000f0, 0x00000070,
393         0x5bc0, 0xf0311fff, 0x80300000,
394         0x98f8, 0x73773777, 0x12010001,
395         0x98fc, 0xffffffff, 0x00000010,
396         0x9b7c, 0x00ff0000, 0x00fc0000,
397         0x8030, 0x00001f0f, 0x0000100a,
398         0x2f48, 0x73773777, 0x12010001,
399         0x2408, 0x000fffff, 0x000c007f,
400         0x8a14, 0xf000003f, 0x00000007,
401         0x8b24, 0x3fff3fff, 0x00ffcfff,
402         0x30a04, 0x0000ff0f, 0x00000000,
403         0x28a4c, 0x07ffffff, 0x06000000,
404         0x4d8, 0x00000fff, 0x00000100,
405         0x3e78, 0x00000001, 0x00000002,
406         0xc768, 0x00000008, 0x00000008,
407         0x8c00, 0x000000ff, 0x00000003,
408         0x214f8, 0x01ff01ff, 0x00000002,
409         0x21498, 0x007ff800, 0x00200000,
410         0x2015c, 0xffffffff, 0x00000f40,
411         0x88c4, 0x001f3ae3, 0x00000082,
412         0x88d4, 0x0000001f, 0x00000010,
413         0x30934, 0xffffffff, 0x00000000
414 };
415
416 static const u32 kalindi_mgcg_cgcg_init[] =
417 {
418         0xc420, 0xffffffff, 0xfffffffc,
419         0x30800, 0xffffffff, 0xe0000000,
420         0x3c2a0, 0xffffffff, 0x00000100,
421         0x3c208, 0xffffffff, 0x00000100,
422         0x3c2c0, 0xffffffff, 0x00000100,
423         0x3c2c8, 0xffffffff, 0x00000100,
424         0x3c2c4, 0xffffffff, 0x00000100,
425         0x55e4, 0xffffffff, 0x00600100,
426         0x3c280, 0xffffffff, 0x00000100,
427         0x3c214, 0xffffffff, 0x06000100,
428         0x3c220, 0xffffffff, 0x00000100,
429         0x3c218, 0xffffffff, 0x06000100,
430         0x3c204, 0xffffffff, 0x00000100,
431         0x3c2e0, 0xffffffff, 0x00000100,
432         0x3c224, 0xffffffff, 0x00000100,
433         0x3c200, 0xffffffff, 0x00000100,
434         0x3c230, 0xffffffff, 0x00000100,
435         0x3c234, 0xffffffff, 0x00000100,
436         0x3c250, 0xffffffff, 0x00000100,
437         0x3c254, 0xffffffff, 0x00000100,
438         0x3c258, 0xffffffff, 0x00000100,
439         0x3c25c, 0xffffffff, 0x00000100,
440         0x3c260, 0xffffffff, 0x00000100,
441         0x3c27c, 0xffffffff, 0x00000100,
442         0x3c278, 0xffffffff, 0x00000100,
443         0x3c210, 0xffffffff, 0x06000100,
444         0x3c290, 0xffffffff, 0x00000100,
445         0x3c274, 0xffffffff, 0x00000100,
446         0x3c2b4, 0xffffffff, 0x00000100,
447         0x3c2b0, 0xffffffff, 0x00000100,
448         0x3c270, 0xffffffff, 0x00000100,
449         0x30800, 0xffffffff, 0xe0000000,
450         0x3c020, 0xffffffff, 0x00010000,
451         0x3c024, 0xffffffff, 0x00030002,
452         0x3c028, 0xffffffff, 0x00040007,
453         0x3c02c, 0xffffffff, 0x00060005,
454         0x3c030, 0xffffffff, 0x00090008,
455         0x3c034, 0xffffffff, 0x00010000,
456         0x3c038, 0xffffffff, 0x00030002,
457         0x3c03c, 0xffffffff, 0x00040007,
458         0x3c040, 0xffffffff, 0x00060005,
459         0x3c044, 0xffffffff, 0x00090008,
460         0x3c000, 0xffffffff, 0x96e00200,
461         0x8708, 0xffffffff, 0x00900100,
462         0xc424, 0xffffffff, 0x0020003f,
463         0x38, 0xffffffff, 0x0140001c,
464         0x3c, 0x000f0000, 0x000f0000,
465         0x220, 0xffffffff, 0xC060000C,
466         0x224, 0xc0000fff, 0x00000100,
467         0x20a8, 0xffffffff, 0x00000104,
468         0x55e4, 0xff000fff, 0x00000100,
469         0x30cc, 0xc0000fff, 0x00000104,
470         0xc1e4, 0x00000001, 0x00000001,
471         0xd00c, 0xff000ff0, 0x00000100,
472         0xd80c, 0xff000ff0, 0x00000100
473 };
474
475 static void cik_init_golden_registers(struct radeon_device *rdev)
476 {
477         switch (rdev->family) {
478         case CHIP_BONAIRE:
479                 radeon_program_register_sequence(rdev,
480                                                  bonaire_mgcg_cgcg_init,
481                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
482                 radeon_program_register_sequence(rdev,
483                                                  bonaire_golden_registers,
484                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
485                 radeon_program_register_sequence(rdev,
486                                                  bonaire_golden_common_registers,
487                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
488                 radeon_program_register_sequence(rdev,
489                                                  bonaire_golden_spm_registers,
490                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
491                 break;
492         case CHIP_KABINI:
493                 radeon_program_register_sequence(rdev,
494                                                  kalindi_mgcg_cgcg_init,
495                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
496                 radeon_program_register_sequence(rdev,
497                                                  kalindi_golden_registers,
498                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
499                 radeon_program_register_sequence(rdev,
500                                                  kalindi_golden_common_registers,
501                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
502                 radeon_program_register_sequence(rdev,
503                                                  kalindi_golden_spm_registers,
504                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
505                 break;
506         case CHIP_KAVERI:
507                 radeon_program_register_sequence(rdev,
508                                                  spectre_mgcg_cgcg_init,
509                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
510                 radeon_program_register_sequence(rdev,
511                                                  spectre_golden_registers,
512                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
513                 radeon_program_register_sequence(rdev,
514                                                  spectre_golden_common_registers,
515                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
516                 radeon_program_register_sequence(rdev,
517                                                  spectre_golden_spm_registers,
518                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
519                 break;
520         default:
521                 break;
522         }
523 }
524
525 /**
526  * cik_get_xclk - get the xclk
527  *
528  * @rdev: radeon_device pointer
529  *
530  * Returns the reference clock used by the gfx engine
531  * (CIK).
532  */
533 u32 cik_get_xclk(struct radeon_device *rdev)
534 {
535         u32 reference_clock = rdev->clock.spll.reference_freq;
536
537         if (rdev->flags & RADEON_IS_IGP) {
538                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
539                         return reference_clock / 2;
540         } else {
541                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
542                         return reference_clock / 4;
543         }
544         return reference_clock;
545 }
546
547 /**
548  * cik_mm_rdoorbell - read a doorbell dword
549  *
550  * @rdev: radeon_device pointer
551  * @offset: byte offset into the aperture
552  *
553  * Returns the value in the doorbell aperture at the
554  * requested offset (CIK).
555  */
556 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
557 {
558         if (offset < rdev->doorbell.size) {
559                 return readl(((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
560         } else {
561                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
562                 return 0;
563         }
564 }
565
566 /**
567  * cik_mm_wdoorbell - write a doorbell dword
568  *
569  * @rdev: radeon_device pointer
570  * @offset: byte offset into the aperture
571  * @v: value to write
572  *
573  * Writes @v to the doorbell aperture at the
574  * requested offset (CIK).
575  */
576 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
577 {
578         if (offset < rdev->doorbell.size) {
579                 writel(v, ((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
580         } else {
581                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
582         }
583 }
584
585 #define BONAIRE_IO_MC_REGS_SIZE 36
586
587 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
588 {
589         {0x00000070, 0x04400000},
590         {0x00000071, 0x80c01803},
591         {0x00000072, 0x00004004},
592         {0x00000073, 0x00000100},
593         {0x00000074, 0x00ff0000},
594         {0x00000075, 0x34000000},
595         {0x00000076, 0x08000014},
596         {0x00000077, 0x00cc08ec},
597         {0x00000078, 0x00000400},
598         {0x00000079, 0x00000000},
599         {0x0000007a, 0x04090000},
600         {0x0000007c, 0x00000000},
601         {0x0000007e, 0x4408a8e8},
602         {0x0000007f, 0x00000304},
603         {0x00000080, 0x00000000},
604         {0x00000082, 0x00000001},
605         {0x00000083, 0x00000002},
606         {0x00000084, 0xf3e4f400},
607         {0x00000085, 0x052024e3},
608         {0x00000087, 0x00000000},
609         {0x00000088, 0x01000000},
610         {0x0000008a, 0x1c0a0000},
611         {0x0000008b, 0xff010000},
612         {0x0000008d, 0xffffefff},
613         {0x0000008e, 0xfff3efff},
614         {0x0000008f, 0xfff3efbf},
615         {0x00000092, 0xf7ffffff},
616         {0x00000093, 0xffffff7f},
617         {0x00000095, 0x00101101},
618         {0x00000096, 0x00000fff},
619         {0x00000097, 0x00116fff},
620         {0x00000098, 0x60010000},
621         {0x00000099, 0x10010000},
622         {0x0000009a, 0x00006000},
623         {0x0000009b, 0x00001000},
624         {0x0000009f, 0x00b48000}
625 };
626
627 /**
628  * cik_srbm_select - select specific register instances
629  *
630  * @rdev: radeon_device pointer
631  * @me: selected ME (micro engine)
632  * @pipe: pipe
633  * @queue: queue
634  * @vmid: VMID
635  *
636  * Switches the currently active registers instances.  Some
637  * registers are instanced per VMID, others are instanced per
638  * me/pipe/queue combination.
639  */
640 static void cik_srbm_select(struct radeon_device *rdev,
641                             u32 me, u32 pipe, u32 queue, u32 vmid)
642 {
643         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
644                              MEID(me & 0x3) |
645                              VMID(vmid & 0xf) |
646                              QUEUEID(queue & 0x7));
647         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
648 }
649
650 /* ucode loading */
651 /**
652  * ci_mc_load_microcode - load MC ucode into the hw
653  *
654  * @rdev: radeon_device pointer
655  *
656  * Load the GDDR MC ucode into the hw (CIK).
657  * Returns 0 on success, error on failure.
658  */
659 static __unused int ci_mc_load_microcode(struct radeon_device *rdev)
660 {
661         const __be32 *fw_data;
662         u32 running, blackout = 0;
663         u32 *io_mc_regs;
664         int i, ucode_size, regs_size;
665
666         if (!rdev->mc_fw)
667                 return -EINVAL;
668
669         switch (rdev->family) {
670         case CHIP_BONAIRE:
671         default:
672                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
673                 ucode_size = CIK_MC_UCODE_SIZE;
674                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
675                 break;
676         }
677
678         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
679
680         if (running == 0) {
681                 if (running) {
682                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
683                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
684                 }
685
686                 /* reset the engine and set to writable */
687                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
688                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
689
690                 /* load mc io regs */
691                 for (i = 0; i < regs_size; i++) {
692                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
693                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
694                 }
695                 /* load the MC ucode */
696                 fw_data = (const __be32 *)rdev->mc_fw->data;
697                 for (i = 0; i < ucode_size; i++)
698                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
699
700                 /* put the engine back into the active state */
701                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
702                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
703                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
704
705                 /* wait for training to complete */
706                 for (i = 0; i < rdev->usec_timeout; i++) {
707                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
708                                 break;
709                         udelay(1);
710                 }
711                 for (i = 0; i < rdev->usec_timeout; i++) {
712                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
713                                 break;
714                         udelay(1);
715                 }
716
717                 if (running)
718                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
719         }
720
721         return 0;
722 }
723
724 /**
725  * cik_init_microcode - load ucode images from disk
726  *
727  * @rdev: radeon_device pointer
728  *
729  * Use the firmware interface to load the ucode images into
730  * the driver (not loaded into hw).
731  * Returns 0 on success, error on failure.
732  */
733 static int cik_init_microcode(struct radeon_device *rdev)
734 {
735         const char *chip_name;
736         size_t pfp_req_size, me_req_size, ce_req_size,
737                 mec_req_size, rlc_req_size, mc_req_size,
738                 sdma_req_size;
739         char fw_name[30];
740         int err;
741
742         DRM_DEBUG("\n");
743
744         switch (rdev->family) {
745         case CHIP_BONAIRE:
746                 chip_name = "BONAIRE";
747                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
748                 me_req_size = CIK_ME_UCODE_SIZE * 4;
749                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
750                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
751                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
752                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
753                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
754                 break;
755         case CHIP_KAVERI:
756                 chip_name = "KAVERI";
757                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
758                 me_req_size = CIK_ME_UCODE_SIZE * 4;
759                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
760                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
761                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
762                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
763                 break;
764         case CHIP_KABINI:
765                 chip_name = "KABINI";
766                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767                 me_req_size = CIK_ME_UCODE_SIZE * 4;
768                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
769                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
771                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772                 break;
773         default: BUG();
774         }
775
776         DRM_INFO("Loading %s Microcode\n", chip_name);
777
778         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
779         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
780         if (err)
781                 goto out;
782         if (rdev->pfp_fw->datasize != pfp_req_size) {
783                 printk(KERN_ERR
784                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
785                        rdev->pfp_fw->datasize, fw_name);
786                 err = -EINVAL;
787                 goto out;
788         }
789
790         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
791         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
792         if (err)
793                 goto out;
794         if (rdev->me_fw->datasize != me_req_size) {
795                 printk(KERN_ERR
796                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
797                        rdev->me_fw->datasize, fw_name);
798                 err = -EINVAL;
799         }
800
801         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
802         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
803         if (err)
804                 goto out;
805         if (rdev->ce_fw->datasize != ce_req_size) {
806                 printk(KERN_ERR
807                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
808                        rdev->ce_fw->datasize, fw_name);
809                 err = -EINVAL;
810         }
811
812         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
813         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
814         if (err)
815                 goto out;
816         if (rdev->mec_fw->datasize != mec_req_size) {
817                 printk(KERN_ERR
818                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
819                        rdev->mec_fw->datasize, fw_name);
820                 err = -EINVAL;
821         }
822
823         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
824         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
825         if (err)
826                 goto out;
827         if (rdev->rlc_fw->datasize != rlc_req_size) {
828                 printk(KERN_ERR
829                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
830                        rdev->rlc_fw->datasize, fw_name);
831                 err = -EINVAL;
832         }
833
834         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
835         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
836         if (err)
837                 goto out;
838         if (rdev->sdma_fw->datasize != sdma_req_size) {
839                 printk(KERN_ERR
840                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
841                        rdev->sdma_fw->datasize, fw_name);
842                 err = -EINVAL;
843         }
844
845         /* No MC ucode on APUs */
846         if (!(rdev->flags & RADEON_IS_IGP)) {
847                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
848                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
849                 if (err)
850                         goto out;
851                 if (rdev->mc_fw->datasize != mc_req_size) {
852                         printk(KERN_ERR
853                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
854                                rdev->mc_fw->datasize, fw_name);
855                         err = -EINVAL;
856                 }
857         }
858
859 out:
860         if (err) {
861                 if (err != -EINVAL)
862                         printk(KERN_ERR
863                                "cik_cp: Failed to load firmware \"%s\"\n",
864                                fw_name);
865                 release_firmware(rdev->pfp_fw);
866                 rdev->pfp_fw = NULL;
867                 release_firmware(rdev->me_fw);
868                 rdev->me_fw = NULL;
869                 release_firmware(rdev->ce_fw);
870                 rdev->ce_fw = NULL;
871                 release_firmware(rdev->mec_fw);
872                 rdev->mec_fw = NULL;
873                 release_firmware(rdev->rlc_fw);
874                 rdev->rlc_fw = NULL;
875                 release_firmware(rdev->sdma_fw);
876                 rdev->sdma_fw = NULL;
877                 release_firmware(rdev->mc_fw);
878                 rdev->mc_fw = NULL;
879         }
880         return err;
881 }
882
883 /*
884  * Core functions
885  */
886 /**
887  * cik_tiling_mode_table_init - init the hw tiling table
888  *
889  * @rdev: radeon_device pointer
890  *
891  * Starting with SI, the tiling setup is done globally in a
892  * set of 32 tiling modes.  Rather than selecting each set of
893  * parameters per surface as on older asics, we just select
894  * which index in the tiling table we want to use, and the
895  * surface uses those parameters (CIK).
896  */
897 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
898 {
899         const u32 num_tile_mode_states = 32;
900         const u32 num_secondary_tile_mode_states = 16;
901         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
902         u32 num_pipe_configs;
903         u32 num_rbs = rdev->config.cik.max_backends_per_se *
904                 rdev->config.cik.max_shader_engines;
905
906         switch (rdev->config.cik.mem_row_size_in_kb) {
907         case 1:
908                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
909                 break;
910         case 2:
911         default:
912                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
913                 break;
914         case 4:
915                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
916                 break;
917         }
918
919         num_pipe_configs = rdev->config.cik.max_tile_pipes;
920         if (num_pipe_configs > 8)
921                 num_pipe_configs = 8; /* ??? */
922
923         if (num_pipe_configs == 8) {
924                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
925                         switch (reg_offset) {
926                         case 0:
927                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
928                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
929                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
930                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
931                                 break;
932                         case 1:
933                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
934                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
935                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
936                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
937                                 break;
938                         case 2:
939                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
940                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
941                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
942                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
943                                 break;
944                         case 3:
945                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
946                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
947                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
948                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
949                                 break;
950                         case 4:
951                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
952                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
953                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
954                                                  TILE_SPLIT(split_equal_to_row_size));
955                                 break;
956                         case 5:
957                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
958                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
959                                 break;
960                         case 6:
961                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
962                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
963                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
964                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
965                                 break;
966                         case 7:
967                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
968                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
970                                                  TILE_SPLIT(split_equal_to_row_size));
971                                 break;
972                         case 8:
973                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
974                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
975                                 break;
976                         case 9:
977                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
978                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
979                                 break;
980                         case 10:
981                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
982                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
983                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
984                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
985                                 break;
986                         case 11:
987                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
988                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
989                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
990                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
991                                 break;
992                         case 12:
993                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
994                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
995                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
996                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
997                                 break;
998                         case 13:
999                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1000                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1001                                 break;
1002                         case 14:
1003                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1004                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1005                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1006                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1007                                 break;
1008                         case 16:
1009                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1010                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1011                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1012                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1013                                 break;
1014                         case 17:
1015                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1016                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1017                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1018                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1019                                 break;
1020                         case 27:
1021                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1022                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1023                                 break;
1024                         case 28:
1025                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1026                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1027                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1028                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1029                                 break;
1030                         case 29:
1031                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1032                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1033                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1034                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1035                                 break;
1036                         case 30:
1037                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1038                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1039                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1040                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1041                                 break;
1042                         default:
1043                                 gb_tile_moden = 0;
1044                                 break;
1045                         }
1046                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1047                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1048                 }
1049                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1050                         switch (reg_offset) {
1051                         case 0:
1052                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1053                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1054                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1055                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1056                                 break;
1057                         case 1:
1058                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1059                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1060                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1061                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1062                                 break;
1063                         case 2:
1064                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1065                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1066                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1067                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1068                                 break;
1069                         case 3:
1070                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1071                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1072                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1073                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1074                                 break;
1075                         case 4:
1076                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1077                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1078                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1079                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1080                                 break;
1081                         case 5:
1082                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1083                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1084                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1085                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1086                                 break;
1087                         case 6:
1088                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1089                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1090                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1091                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1092                                 break;
1093                         case 8:
1094                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1095                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1096                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1097                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1098                                 break;
1099                         case 9:
1100                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1101                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1102                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1103                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1104                                 break;
1105                         case 10:
1106                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1107                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1108                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1109                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1110                                 break;
1111                         case 11:
1112                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1113                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1114                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1115                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1116                                 break;
1117                         case 12:
1118                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1119                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1120                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1121                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1122                                 break;
1123                         case 13:
1124                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1125                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1126                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1127                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1128                                 break;
1129                         case 14:
1130                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1131                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1132                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1133                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1134                                 break;
1135                         default:
1136                                 gb_tile_moden = 0;
1137                                 break;
1138                         }
1139                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1140                 }
1141         } else if (num_pipe_configs == 4) {
1142                 if (num_rbs == 4) {
1143                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1144                                 switch (reg_offset) {
1145                                 case 0:
1146                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1147                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1148                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1149                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1150                                         break;
1151                                 case 1:
1152                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1153                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1154                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1155                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1156                                         break;
1157                                 case 2:
1158                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1159                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1160                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1161                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1162                                         break;
1163                                 case 3:
1164                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1165                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1166                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1167                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1168                                         break;
1169                                 case 4:
1170                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1171                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1172                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1173                                                          TILE_SPLIT(split_equal_to_row_size));
1174                                         break;
1175                                 case 5:
1176                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1177                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1178                                         break;
1179                                 case 6:
1180                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1181                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1182                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1184                                         break;
1185                                 case 7:
1186                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1187                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1188                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189                                                          TILE_SPLIT(split_equal_to_row_size));
1190                                         break;
1191                                 case 8:
1192                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1193                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
1194                                         break;
1195                                 case 9:
1196                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1197                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1198                                         break;
1199                                 case 10:
1200                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1201                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1202                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1203                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1204                                         break;
1205                                 case 11:
1206                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1207                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1208                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1209                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1210                                         break;
1211                                 case 12:
1212                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1213                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1214                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1215                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1216                                         break;
1217                                 case 13:
1218                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1219                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1220                                         break;
1221                                 case 14:
1222                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1223                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1224                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1225                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1226                                         break;
1227                                 case 16:
1228                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1229                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1230                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1231                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1232                                         break;
1233                                 case 17:
1234                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1235                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1236                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1237                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1238                                         break;
1239                                 case 27:
1240                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1241                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1242                                         break;
1243                                 case 28:
1244                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1245                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1246                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1247                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1248                                         break;
1249                                 case 29:
1250                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1251                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1252                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1253                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1254                                         break;
1255                                 case 30:
1256                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1257                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1258                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1259                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1260                                         break;
1261                                 default:
1262                                         gb_tile_moden = 0;
1263                                         break;
1264                                 }
1265                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1266                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1267                         }
1268                 } else if (num_rbs < 4) {
1269                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1270                                 switch (reg_offset) {
1271                                 case 0:
1272                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1273                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1274                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1275                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1276                                         break;
1277                                 case 1:
1278                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1279                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1280                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1281                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1282                                         break;
1283                                 case 2:
1284                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1285                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1286                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1287                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1288                                         break;
1289                                 case 3:
1290                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1291                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1292                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1293                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1294                                         break;
1295                                 case 4:
1296                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1297                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1298                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1299                                                          TILE_SPLIT(split_equal_to_row_size));
1300                                         break;
1301                                 case 5:
1302                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1303                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1304                                         break;
1305                                 case 6:
1306                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1307                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1308                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1309                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1310                                         break;
1311                                 case 7:
1312                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1313                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1314                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1315                                                          TILE_SPLIT(split_equal_to_row_size));
1316                                         break;
1317                                 case 8:
1318                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1319                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
1320                                         break;
1321                                 case 9:
1322                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1323                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1324                                         break;
1325                                 case 10:
1326                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1327                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1328                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1329                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1330                                         break;
1331                                 case 11:
1332                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1333                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1334                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1335                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1336                                         break;
1337                                 case 12:
1338                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1339                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1340                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1341                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1342                                         break;
1343                                 case 13:
1344                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1345                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1346                                         break;
1347                                 case 14:
1348                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1349                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1350                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1351                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1352                                         break;
1353                                 case 16:
1354                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1355                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1356                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1357                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1358                                         break;
1359                                 case 17:
1360                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1361                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1362                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1363                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1364                                         break;
1365                                 case 27:
1366                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1367                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1368                                         break;
1369                                 case 28:
1370                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1371                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1372                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1373                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1374                                         break;
1375                                 case 29:
1376                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1377                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1378                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1379                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1380                                         break;
1381                                 case 30:
1382                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1383                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1384                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1385                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1386                                         break;
1387                                 default:
1388                                         gb_tile_moden = 0;
1389                                         break;
1390                                 }
1391                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1392                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1393                         }
1394                 }
1395                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1396                         switch (reg_offset) {
1397                         case 0:
1398                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1399                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1400                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1401                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1402                                 break;
1403                         case 1:
1404                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1405                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1406                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1407                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1408                                 break;
1409                         case 2:
1410                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1411                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1412                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1413                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1414                                 break;
1415                         case 3:
1416                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1417                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1418                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1419                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1420                                 break;
1421                         case 4:
1422                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1423                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1424                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1425                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1426                                 break;
1427                         case 5:
1428                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1429                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1430                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1431                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1432                                 break;
1433                         case 6:
1434                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1435                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1436                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1437                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1438                                 break;
1439                         case 8:
1440                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1441                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1442                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1443                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1444                                 break;
1445                         case 9:
1446                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1447                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1448                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1449                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1450                                 break;
1451                         case 10:
1452                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1453                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1454                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1455                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1456                                 break;
1457                         case 11:
1458                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1459                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1460                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1461                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1462                                 break;
1463                         case 12:
1464                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1465                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1466                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1467                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1468                                 break;
1469                         case 13:
1470                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1471                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1472                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1473                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1474                                 break;
1475                         case 14:
1476                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1477                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1478                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1479                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1480                                 break;
1481                         default:
1482                                 gb_tile_moden = 0;
1483                                 break;
1484                         }
1485                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1486                 }
1487         } else if (num_pipe_configs == 2) {
1488                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1489                         switch (reg_offset) {
1490                         case 0:
1491                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1492                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1493                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1494                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1495                                 break;
1496                         case 1:
1497                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1498                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1499                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1500                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1501                                 break;
1502                         case 2:
1503                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1504                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1505                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1506                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1507                                 break;
1508                         case 3:
1509                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1510                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1511                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1512                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1513                                 break;
1514                         case 4:
1515                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1516                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1517                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1518                                                  TILE_SPLIT(split_equal_to_row_size));
1519                                 break;
1520                         case 5:
1521                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1522                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1523                                 break;
1524                         case 6:
1525                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1526                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1527                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1528                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1529                                 break;
1530                         case 7:
1531                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1532                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1533                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1534                                                  TILE_SPLIT(split_equal_to_row_size));
1535                                 break;
1536                         case 8:
1537                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1538                                 break;
1539                         case 9:
1540                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1541                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1542                                 break;
1543                         case 10:
1544                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1545                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1546                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1547                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1548                                 break;
1549                         case 11:
1550                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1551                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1552                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1553                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1554                                 break;
1555                         case 12:
1556                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1557                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1558                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1559                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1560                                 break;
1561                         case 13:
1562                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1563                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1564                                 break;
1565                         case 14:
1566                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1567                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1568                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1569                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1570                                 break;
1571                         case 16:
1572                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1573                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1574                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1575                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1576                                 break;
1577                         case 17:
1578                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1579                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1580                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1581                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1582                                 break;
1583                         case 27:
1584                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1585                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1586                                 break;
1587                         case 28:
1588                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1589                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1590                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1591                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1592                                 break;
1593                         case 29:
1594                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1595                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1596                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1597                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1598                                 break;
1599                         case 30:
1600                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1601                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1602                                                  PIPE_CONFIG(ADDR_SURF_P2) |
1603                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1604                                 break;
1605                         default:
1606                                 gb_tile_moden = 0;
1607                                 break;
1608                         }
1609                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1610                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1611                 }
1612                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1613                         switch (reg_offset) {
1614                         case 0:
1615                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1616                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1617                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1618                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1619                                 break;
1620                         case 1:
1621                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1622                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1623                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1624                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1625                                 break;
1626                         case 2:
1627                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1628                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1629                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1630                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1631                                 break;
1632                         case 3:
1633                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1634                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1635                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1636                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1637                                 break;
1638                         case 4:
1639                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1640                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1641                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1642                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1643                                 break;
1644                         case 5:
1645                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1646                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1647                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1648                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1649                                 break;
1650                         case 6:
1651                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1652                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1653                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1654                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1655                                 break;
1656                         case 8:
1657                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1658                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1659                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1660                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1661                                 break;
1662                         case 9:
1663                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1664                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1665                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1666                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1667                                 break;
1668                         case 10:
1669                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1670                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1671                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1672                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1673                                 break;
1674                         case 11:
1675                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1676                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1677                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1678                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1679                                 break;
1680                         case 12:
1681                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1682                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1683                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1684                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1685                                 break;
1686                         case 13:
1687                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1688                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1689                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1690                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1691                                 break;
1692                         case 14:
1693                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1694                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1695                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1696                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1697                                 break;
1698                         default:
1699                                 gb_tile_moden = 0;
1700                                 break;
1701                         }
1702                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1703                 }
1704         } else
1705                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1706 }
1707
1708 /**
1709  * cik_select_se_sh - select which SE, SH to address
1710  *
1711  * @rdev: radeon_device pointer
1712  * @se_num: shader engine to address
1713  * @sh_num: sh block to address
1714  *
1715  * Select which SE, SH combinations to address. Certain
1716  * registers are instanced per SE or SH.  0xffffffff means
1717  * broadcast to all SEs or SHs (CIK).
1718  */
1719 static void cik_select_se_sh(struct radeon_device *rdev,
1720                              u32 se_num, u32 sh_num)
1721 {
1722         u32 data = INSTANCE_BROADCAST_WRITES;
1723
1724         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1725                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1726         else if (se_num == 0xffffffff)
1727                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1728         else if (sh_num == 0xffffffff)
1729                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1730         else
1731                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1732         WREG32(GRBM_GFX_INDEX, data);
1733 }
1734
1735 /**
1736  * cik_create_bitmask - create a bitmask
1737  *
1738  * @bit_width: length of the mask
1739  *
1740  * create a variable length bit mask (CIK).
1741  * Returns the bitmask.
1742  */
1743 static u32 cik_create_bitmask(u32 bit_width)
1744 {
1745         u32 i, mask = 0;
1746
1747         for (i = 0; i < bit_width; i++) {
1748                 mask <<= 1;
1749                 mask |= 1;
1750         }
1751         return mask;
1752 }
1753
1754 /**
1755  * cik_select_se_sh - select which SE, SH to address
1756  *
1757  * @rdev: radeon_device pointer
1758  * @max_rb_num: max RBs (render backends) for the asic
1759  * @se_num: number of SEs (shader engines) for the asic
1760  * @sh_per_se: number of SH blocks per SE for the asic
1761  *
1762  * Calculates the bitmask of disabled RBs (CIK).
1763  * Returns the disabled RB bitmask.
1764  */
1765 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1766                               u32 max_rb_num, u32 se_num,
1767                               u32 sh_per_se)
1768 {
1769         u32 data, mask;
1770
1771         data = RREG32(CC_RB_BACKEND_DISABLE);
1772         if (data & 1)
1773                 data &= BACKEND_DISABLE_MASK;
1774         else
1775                 data = 0;
1776         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1777
1778         data >>= BACKEND_DISABLE_SHIFT;
1779
1780         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1781
1782         return data & mask;
1783 }
1784
1785 /**
1786  * cik_setup_rb - setup the RBs on the asic
1787  *
1788  * @rdev: radeon_device pointer
1789  * @se_num: number of SEs (shader engines) for the asic
1790  * @sh_per_se: number of SH blocks per SE for the asic
1791  * @max_rb_num: max RBs (render backends) for the asic
1792  *
1793  * Configures per-SE/SH RB registers (CIK).
1794  */
1795 static void cik_setup_rb(struct radeon_device *rdev,
1796                          u32 se_num, u32 sh_per_se,
1797                          u32 max_rb_num)
1798 {
1799         int i, j;
1800         u32 data, mask;
1801         u32 disabled_rbs = 0;
1802         u32 enabled_rbs = 0;
1803
1804         for (i = 0; i < se_num; i++) {
1805                 for (j = 0; j < sh_per_se; j++) {
1806                         cik_select_se_sh(rdev, i, j);
1807                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1808                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1809                 }
1810         }
1811         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1812
1813         mask = 1;
1814         for (i = 0; i < max_rb_num; i++) {
1815                 if (!(disabled_rbs & mask))
1816                         enabled_rbs |= mask;
1817                 mask <<= 1;
1818         }
1819
1820         for (i = 0; i < se_num; i++) {
1821                 cik_select_se_sh(rdev, i, 0xffffffff);
1822                 data = 0;
1823                 for (j = 0; j < sh_per_se; j++) {
1824                         switch (enabled_rbs & 3) {
1825                         case 1:
1826                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1827                                 break;
1828                         case 2:
1829                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1830                                 break;
1831                         case 3:
1832                         default:
1833                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1834                                 break;
1835                         }
1836                         enabled_rbs >>= 2;
1837                 }
1838                 WREG32(PA_SC_RASTER_CONFIG, data);
1839         }
1840         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1841 }
1842
1843 /**
1844  * cik_gpu_init - setup the 3D engine
1845  *
1846  * @rdev: radeon_device pointer
1847  *
1848  * Configures the 3D engine and tiling configuration
1849  * registers so that the 3D engine is usable.
1850  */
1851 static __unused void cik_gpu_init(struct radeon_device *rdev)
1852 {
1853         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1854         u32 mc_shared_chmap, mc_arb_ramcfg;
1855         u32 hdp_host_path_cntl;
1856         u32 tmp;
1857         int i, j;
1858
1859         switch (rdev->family) {
1860         case CHIP_BONAIRE:
1861                 rdev->config.cik.max_shader_engines = 2;
1862                 rdev->config.cik.max_tile_pipes = 4;
1863                 rdev->config.cik.max_cu_per_sh = 7;
1864                 rdev->config.cik.max_sh_per_se = 1;
1865                 rdev->config.cik.max_backends_per_se = 2;
1866                 rdev->config.cik.max_texture_channel_caches = 4;
1867                 rdev->config.cik.max_gprs = 256;
1868                 rdev->config.cik.max_gs_threads = 32;
1869                 rdev->config.cik.max_hw_contexts = 8;
1870
1871                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1872                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1873                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1874                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1875                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1876                 break;
1877         case CHIP_KAVERI:
1878                 /* TODO */
1879                 break;
1880         case CHIP_KABINI:
1881         default:
1882                 rdev->config.cik.max_shader_engines = 1;
1883                 rdev->config.cik.max_tile_pipes = 2;
1884                 rdev->config.cik.max_cu_per_sh = 2;
1885                 rdev->config.cik.max_sh_per_se = 1;
1886                 rdev->config.cik.max_backends_per_se = 1;
1887                 rdev->config.cik.max_texture_channel_caches = 2;
1888                 rdev->config.cik.max_gprs = 256;
1889                 rdev->config.cik.max_gs_threads = 16;
1890                 rdev->config.cik.max_hw_contexts = 8;
1891
1892                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1893                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1894                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1895                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1896                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1897                 break;
1898         }
1899
1900         /* Initialize HDP */
1901         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1902                 WREG32((0x2c14 + j), 0x00000000);
1903                 WREG32((0x2c18 + j), 0x00000000);
1904                 WREG32((0x2c1c + j), 0x00000000);
1905                 WREG32((0x2c20 + j), 0x00000000);
1906                 WREG32((0x2c24 + j), 0x00000000);
1907         }
1908
1909         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1910
1911         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1912
1913         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1914         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1915
1916         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1917         rdev->config.cik.mem_max_burst_length_bytes = 256;
1918         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1919         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1920         if (rdev->config.cik.mem_row_size_in_kb > 4)
1921                 rdev->config.cik.mem_row_size_in_kb = 4;
1922         /* XXX use MC settings? */
1923         rdev->config.cik.shader_engine_tile_size = 32;
1924         rdev->config.cik.num_gpus = 1;
1925         rdev->config.cik.multi_gpu_tile_size = 64;
1926
1927         /* fix up row size */
1928         gb_addr_config &= ~ROW_SIZE_MASK;
1929         switch (rdev->config.cik.mem_row_size_in_kb) {
1930         case 1:
1931         default:
1932                 gb_addr_config |= ROW_SIZE(0);
1933                 break;
1934         case 2:
1935                 gb_addr_config |= ROW_SIZE(1);
1936                 break;
1937         case 4:
1938                 gb_addr_config |= ROW_SIZE(2);
1939                 break;
1940         }
1941
1942         /* setup tiling info dword.  gb_addr_config is not adequate since it does
1943          * not have bank info, so create a custom tiling dword.
1944          * bits 3:0   num_pipes
1945          * bits 7:4   num_banks
1946          * bits 11:8  group_size
1947          * bits 15:12 row_size
1948          */
1949         rdev->config.cik.tile_config = 0;
1950         switch (rdev->config.cik.num_tile_pipes) {
1951         case 1:
1952                 rdev->config.cik.tile_config |= (0 << 0);
1953                 break;
1954         case 2:
1955                 rdev->config.cik.tile_config |= (1 << 0);
1956                 break;
1957         case 4:
1958                 rdev->config.cik.tile_config |= (2 << 0);
1959                 break;
1960         case 8:
1961         default:
1962                 /* XXX what about 12? */
1963                 rdev->config.cik.tile_config |= (3 << 0);
1964                 break;
1965         }
1966         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1967                 rdev->config.cik.tile_config |= 1 << 4;
1968         else
1969                 rdev->config.cik.tile_config |= 0 << 4;
1970         rdev->config.cik.tile_config |=
1971                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1972         rdev->config.cik.tile_config |=
1973                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1974
1975         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1976         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1977         WREG32(DMIF_ADDR_CALC, gb_addr_config);
1978         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1979         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1980         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1981         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1982         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1983
1984         cik_tiling_mode_table_init(rdev);
1985
1986         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1987                      rdev->config.cik.max_sh_per_se,
1988                      rdev->config.cik.max_backends_per_se);
1989
1990         /* set HW defaults for 3D engine */
1991         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1992
1993         WREG32(SX_DEBUG_1, 0x20);
1994
1995         WREG32(TA_CNTL_AUX, 0x00010000);
1996
1997         tmp = RREG32(SPI_CONFIG_CNTL);
1998         tmp |= 0x03000000;
1999         WREG32(SPI_CONFIG_CNTL, tmp);
2000
2001         WREG32(SQ_CONFIG, 1);
2002
2003         WREG32(DB_DEBUG, 0);
2004
2005         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2006         tmp |= 0x00000400;
2007         WREG32(DB_DEBUG2, tmp);
2008
2009         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2010         tmp |= 0x00020200;
2011         WREG32(DB_DEBUG3, tmp);
2012
2013         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2014         tmp |= 0x00018208;
2015         WREG32(CB_HW_CONTROL, tmp);
2016
2017         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2018
2019         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2020                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2021                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2022                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2023
2024         WREG32(VGT_NUM_INSTANCES, 1);
2025
2026         WREG32(CP_PERFMON_CNTL, 0);
2027
2028         WREG32(SQ_CONFIG, 0);
2029
2030         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2031                                           FORCE_EOV_MAX_REZ_CNT(255)));
2032
2033         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2034                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2035
2036         WREG32(VGT_GS_VERTEX_REUSE, 16);
2037         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2038
2039         tmp = RREG32(HDP_MISC_CNTL);
2040         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2041         WREG32(HDP_MISC_CNTL, tmp);
2042
2043         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2044         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2045
2046         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2047         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2048
2049         udelay(50);
2050 }
2051
2052 /*
2053  * GPU scratch registers helpers function.
2054  */
2055 /**
2056  * cik_scratch_init - setup driver info for CP scratch regs
2057  *
2058  * @rdev: radeon_device pointer
2059  *
2060  * Set up the number and offset of the CP scratch registers.
2061  * NOTE: use of CP scratch registers is a legacy inferface and
2062  * is not used by default on newer asics (r6xx+).  On newer asics,
2063  * memory buffers are used for fences rather than scratch regs.
2064  */
2065 static __unused void cik_scratch_init(struct radeon_device *rdev)
2066 {
2067         int i;
2068
2069         rdev->scratch.num_reg = 7;
2070         rdev->scratch.reg_base = SCRATCH_REG0;
2071         for (i = 0; i < rdev->scratch.num_reg; i++) {
2072                 rdev->scratch.free[i] = true;
2073                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2074         }
2075 }
2076
2077 /**
2078  * cik_ring_test - basic gfx ring test
2079  *
2080  * @rdev: radeon_device pointer
2081  * @ring: radeon_ring structure holding ring information
2082  *
2083  * Allocate a scratch register and write to it using the gfx ring (CIK).
2084  * Provides a basic gfx ring test to verify that the ring is working.
2085  * Used by cik_cp_gfx_resume();
2086  * Returns 0 on success, error on failure.
2087  */
2088 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2089 {
2090         uint32_t scratch;
2091         uint32_t tmp = 0;
2092         unsigned i;
2093         int r;
2094
2095         r = radeon_scratch_get(rdev, &scratch);
2096         if (r) {
2097                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2098                 return r;
2099         }
2100         WREG32(scratch, 0xCAFEDEAD);
2101         r = radeon_ring_lock(rdev, ring, 3);
2102         if (r) {
2103                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2104                 radeon_scratch_free(rdev, scratch);
2105                 return r;
2106         }
2107         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2108         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2109         radeon_ring_write(ring, 0xDEADBEEF);
2110         radeon_ring_unlock_commit(rdev, ring);
2111
2112         for (i = 0; i < rdev->usec_timeout; i++) {
2113                 tmp = RREG32(scratch);
2114                 if (tmp == 0xDEADBEEF)
2115                         break;
2116                 DRM_UDELAY(1);
2117         }
2118         if (i < rdev->usec_timeout) {
2119                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2120         } else {
2121                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2122                           ring->idx, scratch, tmp);
2123                 r = -EINVAL;
2124         }
2125         radeon_scratch_free(rdev, scratch);
2126         return r;
2127 }
2128
2129 /**
2130  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2131  *
2132  * @rdev: radeon_device pointer
2133  * @fence: radeon fence object
2134  *
2135  * Emits a fence sequnce number on the gfx ring and flushes
2136  * GPU caches.
2137  */
2138 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2139                              struct radeon_fence *fence)
2140 {
2141         struct radeon_ring *ring = &rdev->ring[fence->ring];
2142         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2143
2144         /* EVENT_WRITE_EOP - flush caches, send int */
2145         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2146         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2147                                  EOP_TC_ACTION_EN |
2148                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2149                                  EVENT_INDEX(5)));
2150         radeon_ring_write(ring, addr & 0xfffffffc);
2151         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2152         radeon_ring_write(ring, fence->seq);
2153         radeon_ring_write(ring, 0);
2154         /* HDP flush */
2155         /* We should be using the new WAIT_REG_MEM special op packet here
2156          * but it causes the CP to hang
2157          */
2158         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2159         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2160                                  WRITE_DATA_DST_SEL(0)));
2161         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2162         radeon_ring_write(ring, 0);
2163         radeon_ring_write(ring, 0);
2164 }
2165
2166 /**
2167  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2168  *
2169  * @rdev: radeon_device pointer
2170  * @fence: radeon fence object
2171  *
2172  * Emits a fence sequnce number on the compute ring and flushes
2173  * GPU caches.
2174  */
2175 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2176                                  struct radeon_fence *fence)
2177 {
2178         struct radeon_ring *ring = &rdev->ring[fence->ring];
2179         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2180
2181         /* RELEASE_MEM - flush caches, send int */
2182         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2183         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2184                                  EOP_TC_ACTION_EN |
2185                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2186                                  EVENT_INDEX(5)));
2187         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2188         radeon_ring_write(ring, addr & 0xfffffffc);
2189         radeon_ring_write(ring, upper_32_bits(addr));
2190         radeon_ring_write(ring, fence->seq);
2191         radeon_ring_write(ring, 0);
2192         /* HDP flush */
2193         /* We should be using the new WAIT_REG_MEM special op packet here
2194          * but it causes the CP to hang
2195          */
2196         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2197         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2198                                  WRITE_DATA_DST_SEL(0)));
2199         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2200         radeon_ring_write(ring, 0);
2201         radeon_ring_write(ring, 0);
2202 }
2203
2204 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2205                              struct radeon_ring *ring,
2206                              struct radeon_semaphore *semaphore,
2207                              bool emit_wait)
2208 {
2209         uint64_t addr = semaphore->gpu_addr;
2210         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2211
2212         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2213         radeon_ring_write(ring, addr & 0xffffffff);
2214         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2215 }
2216
2217 /*
2218  * IB stuff
2219  */
2220 /**
2221  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2222  *
2223  * @rdev: radeon_device pointer
2224  * @ib: radeon indirect buffer object
2225  *
2226  * Emits an DE (drawing engine) or CE (constant engine) IB
2227  * on the gfx ring.  IBs are usually generated by userspace
2228  * acceleration drivers and submitted to the kernel for
2229  * sheduling on the ring.  This function schedules the IB
2230  * on the gfx ring for execution by the GPU.
2231  */
2232 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2233 {
2234         struct radeon_ring *ring = &rdev->ring[ib->ring];
2235         u32 header, control = INDIRECT_BUFFER_VALID;
2236
2237         if (ib->is_const_ib) {
2238                 /* set switch buffer packet before const IB */
2239                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2240                 radeon_ring_write(ring, 0);
2241
2242                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2243         } else {
2244                 u32 next_rptr;
2245                 if (ring->rptr_save_reg) {
2246                         next_rptr = ring->wptr + 3 + 4;
2247                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2248                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2249                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
2250                         radeon_ring_write(ring, next_rptr);
2251                 } else if (rdev->wb.enabled) {
2252                         next_rptr = ring->wptr + 5 + 4;
2253                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2254                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2255                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2256                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2257                         radeon_ring_write(ring, next_rptr);
2258                 }
2259
2260                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2261         }
2262
2263         control |= ib->length_dw |
2264                 (ib->vm ? (ib->vm->id << 24) : 0);
2265
2266         radeon_ring_write(ring, header);
2267         radeon_ring_write(ring,
2268 #ifdef __BIG_ENDIAN
2269                           (2 << 0) |
2270 #endif
2271                           (ib->gpu_addr & 0xFFFFFFFC));
2272         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2273         radeon_ring_write(ring, control);
2274 }
2275
2276 /**
2277  * cik_ib_test - basic gfx ring IB test
2278  *
2279  * @rdev: radeon_device pointer
2280  * @ring: radeon_ring structure holding ring information
2281  *
2282  * Allocate an IB and execute it on the gfx ring (CIK).
2283  * Provides a basic gfx ring test to verify that IBs are working.
2284  * Returns 0 on success, error on failure.
2285  */
2286 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2287 {
2288         struct radeon_ib ib;
2289         uint32_t scratch;
2290         uint32_t tmp = 0;
2291         unsigned i;
2292         int r;
2293
2294         r = radeon_scratch_get(rdev, &scratch);
2295         if (r) {
2296                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2297                 return r;
2298         }
2299         WREG32(scratch, 0xCAFEDEAD);
2300         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2301         if (r) {
2302                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2303                 return r;
2304         }
2305         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2306         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2307         ib.ptr[2] = 0xDEADBEEF;
2308         ib.length_dw = 3;
2309         r = radeon_ib_schedule(rdev, &ib, NULL);
2310         if (r) {
2311                 radeon_scratch_free(rdev, scratch);
2312                 radeon_ib_free(rdev, &ib);
2313                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2314                 return r;
2315         }
2316         r = radeon_fence_wait(ib.fence, false);
2317         if (r) {
2318                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2319                 return r;
2320         }
2321         for (i = 0; i < rdev->usec_timeout; i++) {
2322                 tmp = RREG32(scratch);
2323                 if (tmp == 0xDEADBEEF)
2324                         break;
2325                 DRM_UDELAY(1);
2326         }
2327         if (i < rdev->usec_timeout) {
2328                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2329         } else {
2330                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2331                           scratch, tmp);
2332                 r = -EINVAL;
2333         }
2334         radeon_scratch_free(rdev, scratch);
2335         radeon_ib_free(rdev, &ib);
2336         return r;
2337 }
2338
2339 /*
2340  * CP.
2341  * On CIK, gfx and compute now have independant command processors.
2342  *
2343  * GFX
2344  * Gfx consists of a single ring and can process both gfx jobs and
2345  * compute jobs.  The gfx CP consists of three microengines (ME):
2346  * PFP - Pre-Fetch Parser
2347  * ME - Micro Engine
2348  * CE - Constant Engine
2349  * The PFP and ME make up what is considered the Drawing Engine (DE).
2350  * The CE is an asynchronous engine used for updating buffer desciptors
2351  * used by the DE so that they can be loaded into cache in parallel
2352  * while the DE is processing state update packets.
2353  *
2354  * Compute
2355  * The compute CP consists of two microengines (ME):
2356  * MEC1 - Compute MicroEngine 1
2357  * MEC2 - Compute MicroEngine 2
2358  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2359  * The queues are exposed to userspace and are programmed directly
2360  * by the compute runtime.
2361  */
2362 /**
2363  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2364  *
2365  * @rdev: radeon_device pointer
2366  * @enable: enable or disable the MEs
2367  *
2368  * Halts or unhalts the gfx MEs.
2369  */
2370 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2371 {
2372         if (enable)
2373                 WREG32(CP_ME_CNTL, 0);
2374         else {
2375                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2376                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2377         }
2378         udelay(50);
2379 }
2380
2381 /**
2382  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2383  *
2384  * @rdev: radeon_device pointer
2385  *
2386  * Loads the gfx PFP, ME, and CE ucode.
2387  * Returns 0 for success, -EINVAL if the ucode is not available.
2388  */
2389 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2390 {
2391         const __be32 *fw_data;
2392         int i;
2393
2394         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2395                 return -EINVAL;
2396
2397         cik_cp_gfx_enable(rdev, false);
2398
2399         /* PFP */
2400         fw_data = (const __be32 *)rdev->pfp_fw->data;
2401         WREG32(CP_PFP_UCODE_ADDR, 0);
2402         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2403                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2404         WREG32(CP_PFP_UCODE_ADDR, 0);
2405
2406         /* CE */
2407         fw_data = (const __be32 *)rdev->ce_fw->data;
2408         WREG32(CP_CE_UCODE_ADDR, 0);
2409         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2410                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2411         WREG32(CP_CE_UCODE_ADDR, 0);
2412
2413         /* ME */
2414         fw_data = (const __be32 *)rdev->me_fw->data;
2415         WREG32(CP_ME_RAM_WADDR, 0);
2416         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2417                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2418         WREG32(CP_ME_RAM_WADDR, 0);
2419
2420         WREG32(CP_PFP_UCODE_ADDR, 0);
2421         WREG32(CP_CE_UCODE_ADDR, 0);
2422         WREG32(CP_ME_RAM_WADDR, 0);
2423         WREG32(CP_ME_RAM_RADDR, 0);
2424         return 0;
2425 }
2426
2427 /**
2428  * cik_cp_gfx_start - start the gfx ring
2429  *
2430  * @rdev: radeon_device pointer
2431  *
2432  * Enables the ring and loads the clear state context and other
2433  * packets required to init the ring.
2434  * Returns 0 for success, error for failure.
2435  */
2436 static int cik_cp_gfx_start(struct radeon_device *rdev)
2437 {
2438         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2439         int r, i;
2440
2441         /* init the CP */
2442         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2443         WREG32(CP_ENDIAN_SWAP, 0);
2444         WREG32(CP_DEVICE_ID, 1);
2445
2446         cik_cp_gfx_enable(rdev, true);
2447
2448         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2449         if (r) {
2450                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2451                 return r;
2452         }
2453
2454         /* init the CE partitions.  CE only used for gfx on CIK */
2455         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2456         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2457         radeon_ring_write(ring, 0xc000);
2458         radeon_ring_write(ring, 0xc000);
2459
2460         /* setup clear context state */
2461         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2462         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2463
2464         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2465         radeon_ring_write(ring, 0x80000000);
2466         radeon_ring_write(ring, 0x80000000);
2467
2468         for (i = 0; i < cik_default_size; i++)
2469                 radeon_ring_write(ring, cik_default_state[i]);
2470
2471         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2472         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2473
2474         /* set clear context state */
2475         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2476         radeon_ring_write(ring, 0);
2477
2478         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2479         radeon_ring_write(ring, 0x00000316);
2480         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2481         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2482
2483         radeon_ring_unlock_commit(rdev, ring);
2484
2485         return 0;
2486 }
2487
2488 /**
2489  * cik_cp_gfx_fini - stop the gfx ring
2490  *
2491  * @rdev: radeon_device pointer
2492  *
2493  * Stop the gfx ring and tear down the driver ring
2494  * info.
2495  */
2496 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2497 {
2498         cik_cp_gfx_enable(rdev, false);
2499         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2500 }
2501
2502 /**
2503  * cik_cp_gfx_resume - setup the gfx ring buffer registers
2504  *
2505  * @rdev: radeon_device pointer
2506  *
2507  * Program the location and size of the gfx ring buffer
2508  * and test it to make sure it's working.
2509  * Returns 0 for success, error for failure.
2510  */
2511 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2512 {
2513         struct radeon_ring *ring;
2514         u32 tmp;
2515         u32 rb_bufsz;
2516         u64 rb_addr;
2517         int r;
2518
2519         WREG32(CP_SEM_WAIT_TIMER, 0x0);
2520         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2521
2522         /* Set the write pointer delay */
2523         WREG32(CP_RB_WPTR_DELAY, 0);
2524
2525         /* set the RB to use vmid 0 */
2526         WREG32(CP_RB_VMID, 0);
2527
2528         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2529
2530         /* ring 0 - compute and gfx */
2531         /* Set ring buffer size */
2532         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2533         rb_bufsz = drm_order(ring->ring_size / 8);
2534         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2535 #ifdef __BIG_ENDIAN
2536         tmp |= BUF_SWAP_32BIT;
2537 #endif
2538         WREG32(CP_RB0_CNTL, tmp);
2539
2540         /* Initialize the ring buffer's read and write pointers */
2541         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2542         ring->wptr = 0;
2543         WREG32(CP_RB0_WPTR, ring->wptr);
2544
2545         /* set the wb address wether it's enabled or not */
2546         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2547         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2548
2549         /* scratch register shadowing is no longer supported */
2550         WREG32(SCRATCH_UMSK, 0);
2551
2552         if (!rdev->wb.enabled)
2553                 tmp |= RB_NO_UPDATE;
2554
2555         mdelay(1);
2556         WREG32(CP_RB0_CNTL, tmp);
2557
2558         rb_addr = ring->gpu_addr >> 8;
2559         WREG32(CP_RB0_BASE, rb_addr);
2560         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2561
2562         ring->rptr = RREG32(CP_RB0_RPTR);
2563
2564         /* start the ring */
2565         cik_cp_gfx_start(rdev);
2566         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2567         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2568         if (r) {
2569                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2570                 return r;
2571         }
2572         return 0;
2573 }
2574
2575 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2576                               struct radeon_ring *ring)
2577 {
2578         u32 rptr;
2579
2580
2581
2582         if (rdev->wb.enabled) {
2583                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2584         } else {
2585                 spin_lock(&rdev->srbm_mutex);
2586                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2587                 rptr = RREG32(CP_HQD_PQ_RPTR);
2588                 cik_srbm_select(rdev, 0, 0, 0, 0);
2589                 spin_unlock(&rdev->srbm_mutex);
2590         }
2591         rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2592
2593         return rptr;
2594 }
2595
2596 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2597                               struct radeon_ring *ring)
2598 {
2599         u32 wptr;
2600
2601         if (rdev->wb.enabled) {
2602                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2603         } else {
2604                 spin_lock(&rdev->srbm_mutex);
2605                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2606                 wptr = RREG32(CP_HQD_PQ_WPTR);
2607                 cik_srbm_select(rdev, 0, 0, 0, 0);
2608                 spin_unlock(&rdev->srbm_mutex);
2609         }
2610         wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2611
2612         return wptr;
2613 }
2614
2615 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2616                                struct radeon_ring *ring)
2617 {
2618         u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2619
2620         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2621         WDOORBELL32(ring->doorbell_offset, wptr);
2622 }
2623
2624 /**
2625  * cik_cp_compute_enable - enable/disable the compute CP MEs
2626  *
2627  * @rdev: radeon_device pointer
2628  * @enable: enable or disable the MEs
2629  *
2630  * Halts or unhalts the compute MEs.
2631  */
2632 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2633 {
2634         if (enable)
2635                 WREG32(CP_MEC_CNTL, 0);
2636         else
2637                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2638         udelay(50);
2639 }
2640
2641 /**
2642  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2643  *
2644  * @rdev: radeon_device pointer
2645  *
2646  * Loads the compute MEC1&2 ucode.
2647  * Returns 0 for success, -EINVAL if the ucode is not available.
2648  */
2649 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2650 {
2651         const __be32 *fw_data;
2652         int i;
2653
2654         if (!rdev->mec_fw)
2655                 return -EINVAL;
2656
2657         cik_cp_compute_enable(rdev, false);
2658
2659         /* MEC1 */
2660         fw_data = (const __be32 *)rdev->mec_fw->data;
2661         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2662         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2663                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2664         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2665
2666         if (rdev->family == CHIP_KAVERI) {
2667                 /* MEC2 */
2668                 fw_data = (const __be32 *)rdev->mec_fw->data;
2669                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2670                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2671                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2672                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2673         }
2674
2675         return 0;
2676 }
2677
2678 /**
2679  * cik_cp_compute_start - start the compute queues
2680  *
2681  * @rdev: radeon_device pointer
2682  *
2683  * Enable the compute queues.
2684  * Returns 0 for success, error for failure.
2685  */
2686 static int cik_cp_compute_start(struct radeon_device *rdev)
2687 {
2688         cik_cp_compute_enable(rdev, true);
2689
2690         return 0;
2691 }
2692
2693 /**
2694  * cik_cp_compute_fini - stop the compute queues
2695  *
2696  * @rdev: radeon_device pointer
2697  *
2698  * Stop the compute queues and tear down the driver queue
2699  * info.
2700  */
2701 static void cik_cp_compute_fini(struct radeon_device *rdev)
2702 {
2703         int i, idx, r;
2704
2705         cik_cp_compute_enable(rdev, false);
2706
2707         for (i = 0; i < 2; i++) {
2708                 if (i == 0)
2709                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2710                 else
2711                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2712
2713                 if (rdev->ring[idx].mqd_obj) {
2714                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2715                         if (unlikely(r != 0))
2716                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2717
2718                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2719                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2720
2721                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2722                         rdev->ring[idx].mqd_obj = NULL;
2723                 }
2724         }
2725 }
2726
2727 static void cik_mec_fini(struct radeon_device *rdev)
2728 {
2729         int r;
2730
2731         if (rdev->mec.hpd_eop_obj) {
2732                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2733                 if (unlikely(r != 0))
2734                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2735                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2736                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2737
2738                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2739                 rdev->mec.hpd_eop_obj = NULL;
2740         }
2741 }
2742
2743 #define MEC_HPD_SIZE 2048
2744
2745 static int cik_mec_init(struct radeon_device *rdev)
2746 {
2747         int r;
2748         u32 *hpd;
2749
2750         /*
2751          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2752          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2753          */
2754         if (rdev->family == CHIP_KAVERI)
2755                 rdev->mec.num_mec = 2;
2756         else
2757                 rdev->mec.num_mec = 1;
2758         rdev->mec.num_pipe = 4;
2759         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2760
2761         if (rdev->mec.hpd_eop_obj == NULL) {
2762                 r = radeon_bo_create(rdev,
2763                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2764                                      PAGE_SIZE, true,
2765                                      RADEON_GEM_DOMAIN_GTT, NULL,
2766                                      &rdev->mec.hpd_eop_obj);
2767                 if (r) {
2768                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2769                         return r;
2770                 }
2771         }
2772
2773         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2774         if (unlikely(r != 0)) {
2775                 cik_mec_fini(rdev);
2776                 return r;
2777         }
2778         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2779                           &rdev->mec.hpd_eop_gpu_addr);
2780         if (r) {
2781                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2782                 cik_mec_fini(rdev);
2783                 return r;
2784         }
2785         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2786         if (r) {
2787                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2788                 cik_mec_fini(rdev);
2789                 return r;
2790         }
2791
2792         /* clear memory.  Not sure if this is required or not */
2793         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2794
2795         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2796         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2797
2798         return 0;
2799 }
2800
2801 struct hqd_registers
2802 {
2803         u32 cp_mqd_base_addr;
2804         u32 cp_mqd_base_addr_hi;
2805         u32 cp_hqd_active;
2806         u32 cp_hqd_vmid;
2807         u32 cp_hqd_persistent_state;
2808         u32 cp_hqd_pipe_priority;
2809         u32 cp_hqd_queue_priority;
2810         u32 cp_hqd_quantum;
2811         u32 cp_hqd_pq_base;
2812         u32 cp_hqd_pq_base_hi;
2813         u32 cp_hqd_pq_rptr;
2814         u32 cp_hqd_pq_rptr_report_addr;
2815         u32 cp_hqd_pq_rptr_report_addr_hi;
2816         u32 cp_hqd_pq_wptr_poll_addr;
2817         u32 cp_hqd_pq_wptr_poll_addr_hi;
2818         u32 cp_hqd_pq_doorbell_control;
2819         u32 cp_hqd_pq_wptr;
2820         u32 cp_hqd_pq_control;
2821         u32 cp_hqd_ib_base_addr;
2822         u32 cp_hqd_ib_base_addr_hi;
2823         u32 cp_hqd_ib_rptr;
2824         u32 cp_hqd_ib_control;
2825         u32 cp_hqd_iq_timer;
2826         u32 cp_hqd_iq_rptr;
2827         u32 cp_hqd_dequeue_request;
2828         u32 cp_hqd_dma_offload;
2829         u32 cp_hqd_sema_cmd;
2830         u32 cp_hqd_msg_type;
2831         u32 cp_hqd_atomic0_preop_lo;
2832         u32 cp_hqd_atomic0_preop_hi;
2833         u32 cp_hqd_atomic1_preop_lo;
2834         u32 cp_hqd_atomic1_preop_hi;
2835         u32 cp_hqd_hq_scheduler0;
2836         u32 cp_hqd_hq_scheduler1;
2837         u32 cp_mqd_control;
2838 };
2839
2840 struct bonaire_mqd
2841 {
2842         u32 header;
2843         u32 dispatch_initiator;
2844         u32 dimensions[3];
2845         u32 start_idx[3];
2846         u32 num_threads[3];
2847         u32 pipeline_stat_enable;
2848         u32 perf_counter_enable;
2849         u32 pgm[2];
2850         u32 tba[2];
2851         u32 tma[2];
2852         u32 pgm_rsrc[2];
2853         u32 vmid;
2854         u32 resource_limits;
2855         u32 static_thread_mgmt01[2];
2856         u32 tmp_ring_size;
2857         u32 static_thread_mgmt23[2];
2858         u32 restart[3];
2859         u32 thread_trace_enable;
2860         u32 reserved1;
2861         u32 user_data[16];
2862         u32 vgtcs_invoke_count[2];
2863         struct hqd_registers queue_state;
2864         u32 dequeue_cntr;
2865         u32 interrupt_queue[64];
2866 };
2867
2868 /**
2869  * cik_cp_compute_resume - setup the compute queue registers
2870  *
2871  * @rdev: radeon_device pointer
2872  *
2873  * Program the compute queues and test them to make sure they
2874  * are working.
2875  * Returns 0 for success, error for failure.
2876  */
2877 static int cik_cp_compute_resume(struct radeon_device *rdev)
2878 {
2879         int r, i, idx;
2880         u32 tmp;
2881         bool use_doorbell = true;
2882         u64 hqd_gpu_addr;
2883         u64 mqd_gpu_addr;
2884         u64 eop_gpu_addr;
2885         u64 wb_gpu_addr;
2886         u32 *buf;
2887         struct bonaire_mqd *mqd;
2888
2889         r = cik_cp_compute_start(rdev);
2890         if (r)
2891                 return r;
2892
2893         /* fix up chicken bits */
2894         tmp = RREG32(CP_CPF_DEBUG);
2895         tmp |= (1 << 23);
2896         WREG32(CP_CPF_DEBUG, tmp);
2897
2898         /* init the pipes */
2899         spin_lock(&rdev->srbm_mutex);
2900         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2901                 int me = (i < 4) ? 1 : 2;
2902                 int pipe = (i < 4) ? i : (i - 4);
2903
2904                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2905
2906                 cik_srbm_select(rdev, me, pipe, 0, 0);
2907
2908                 /* write the EOP addr */
2909                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2910                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2911
2912                 /* set the VMID assigned */
2913                 WREG32(CP_HPD_EOP_VMID, 0);
2914
2915                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2916                 tmp = RREG32(CP_HPD_EOP_CONTROL);
2917                 tmp &= ~EOP_SIZE_MASK;
2918                 tmp |= drm_order(MEC_HPD_SIZE / 8);
2919                 WREG32(CP_HPD_EOP_CONTROL, tmp);
2920         }
2921         cik_srbm_select(rdev, 0, 0, 0, 0);
2922         spin_unlock(&rdev->srbm_mutex);
2923
2924         /* init the queues.  Just two for now. */
2925         for (i = 0; i < 2; i++) {
2926                 if (i == 0)
2927                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2928                 else
2929                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2930
2931                 if (rdev->ring[idx].mqd_obj == NULL) {
2932                         r = radeon_bo_create(rdev,
2933                                              sizeof(struct bonaire_mqd),
2934                                              PAGE_SIZE, true,
2935                                              RADEON_GEM_DOMAIN_GTT, NULL,
2936                                              &rdev->ring[idx].mqd_obj);
2937                         if (r) {
2938                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2939                                 return r;
2940                         }
2941                 }
2942
2943                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2944                 if (unlikely(r != 0)) {
2945                         cik_cp_compute_fini(rdev);
2946                         return r;
2947                 }
2948                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2949                                   &mqd_gpu_addr);
2950                 if (r) {
2951                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2952                         cik_cp_compute_fini(rdev);
2953                         return r;
2954                 }
2955                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2956                 if (r) {
2957                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2958                         cik_cp_compute_fini(rdev);
2959                         return r;
2960                 }
2961
2962                 /* doorbell offset */
2963                 rdev->ring[idx].doorbell_offset =
2964                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2965
2966                 /* init the mqd struct */
2967                 memset(buf, 0, sizeof(struct bonaire_mqd));
2968
2969                 mqd = (struct bonaire_mqd *)buf;
2970                 mqd->header = 0xC0310800;
2971                 mqd->static_thread_mgmt01[0] = 0xffffffff;
2972                 mqd->static_thread_mgmt01[1] = 0xffffffff;
2973                 mqd->static_thread_mgmt23[0] = 0xffffffff;
2974                 mqd->static_thread_mgmt23[1] = 0xffffffff;
2975
2976                 spin_lock(&rdev->srbm_mutex);
2977                 cik_srbm_select(rdev, rdev->ring[idx].me,
2978                                 rdev->ring[idx].pipe,
2979                                 rdev->ring[idx].queue, 0);
2980
2981                 /* disable wptr polling */
2982                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2983                 tmp &= ~WPTR_POLL_EN;
2984                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2985
2986                 /* enable doorbell? */
2987                 mqd->queue_state.cp_hqd_pq_doorbell_control =
2988                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2989                 if (use_doorbell)
2990                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2991                 else
2992                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2993                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2994                        mqd->queue_state.cp_hqd_pq_doorbell_control);
2995
2996                 /* disable the queue if it's active */
2997                 mqd->queue_state.cp_hqd_dequeue_request = 0;
2998                 mqd->queue_state.cp_hqd_pq_rptr = 0;
2999                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3000                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3001                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3002                         for (i = 0; i < rdev->usec_timeout; i++) {
3003                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3004                                         break;
3005                                 udelay(1);
3006                         }
3007                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3008                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3009                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3010                 }
3011
3012                 /* set the pointer to the MQD */
3013                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3014                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3015                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3016                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3017                 /* set MQD vmid to 0 */
3018                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3019                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3020                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3021
3022                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3023                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3024                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3025                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3026                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3027                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3028
3029                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3030                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3031                 mqd->queue_state.cp_hqd_pq_control &=
3032                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3033
3034                 mqd->queue_state.cp_hqd_pq_control |=
3035                         drm_order(rdev->ring[idx].ring_size / 8);
3036                 mqd->queue_state.cp_hqd_pq_control |=
3037                         (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3038 #ifdef __BIG_ENDIAN
3039                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3040 #endif
3041                 mqd->queue_state.cp_hqd_pq_control &=
3042                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3043                 mqd->queue_state.cp_hqd_pq_control |=
3044                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3045                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3046
3047                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3048                 if (i == 0)
3049                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3050                 else
3051                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3052                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3053                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3054                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3055                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3056                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3057
3058                 /* set the wb address wether it's enabled or not */
3059                 if (i == 0)
3060                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3061                 else
3062                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3063                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3064                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3065                         upper_32_bits(wb_gpu_addr) & 0xffff;
3066                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3067                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3068                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3069                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3070
3071                 /* enable the doorbell if requested */
3072                 if (use_doorbell) {
3073                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3074                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3075                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3076                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3077                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3078                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3079                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3080                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3081
3082                 } else {
3083                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3084                 }
3085                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3086                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3087
3088                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3089                 rdev->ring[idx].wptr = 0;
3090                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3091                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3092                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3093                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3094
3095                 /* set the vmid for the queue */
3096                 mqd->queue_state.cp_hqd_vmid = 0;
3097                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3098
3099                 /* activate the queue */
3100                 mqd->queue_state.cp_hqd_active = 1;
3101                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3102
3103                 cik_srbm_select(rdev, 0, 0, 0, 0);
3104                 spin_unlock(&rdev->srbm_mutex);
3105
3106                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3107                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3108
3109                 rdev->ring[idx].ready = true;
3110                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3111                 if (r)
3112                         rdev->ring[idx].ready = false;
3113         }
3114
3115         return 0;
3116 }
3117
3118 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3119 {
3120         cik_cp_gfx_enable(rdev, enable);
3121         cik_cp_compute_enable(rdev, enable);
3122 }
3123
3124 static int cik_cp_load_microcode(struct radeon_device *rdev)
3125 {
3126         int r;
3127
3128         r = cik_cp_gfx_load_microcode(rdev);
3129         if (r)
3130                 return r;
3131         r = cik_cp_compute_load_microcode(rdev);
3132         if (r)
3133                 return r;
3134
3135         return 0;
3136 }
3137
3138 static void cik_cp_fini(struct radeon_device *rdev)
3139 {
3140         cik_cp_gfx_fini(rdev);
3141         cik_cp_compute_fini(rdev);
3142 }
3143
3144 static int cik_cp_resume(struct radeon_device *rdev)
3145 {
3146         int r;
3147
3148         /* Reset all cp blocks */
3149         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3150         RREG32(GRBM_SOFT_RESET);
3151         mdelay(15);
3152         WREG32(GRBM_SOFT_RESET, 0);
3153         RREG32(GRBM_SOFT_RESET);
3154
3155         r = cik_cp_load_microcode(rdev);
3156         if (r)
3157                 return r;
3158
3159         r = cik_cp_gfx_resume(rdev);
3160         if (r)
3161                 return r;
3162         r = cik_cp_compute_resume(rdev);
3163         if (r)
3164                 return r;
3165
3166         return 0;
3167 }
3168
3169 /*
3170  * sDMA - System DMA
3171  * Starting with CIK, the GPU has new asynchronous
3172  * DMA engines.  These engines are used for compute
3173  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
3174  * and each one supports 1 ring buffer used for gfx
3175  * and 2 queues used for compute.
3176  *
3177  * The programming model is very similar to the CP
3178  * (ring buffer, IBs, etc.), but sDMA has it's own
3179  * packet format that is different from the PM4 format
3180  * used by the CP. sDMA supports copying data, writing
3181  * embedded data, solid fills, and a number of other
3182  * things.  It also has support for tiling/detiling of
3183  * buffers.
3184  */
3185 /**
3186  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3187  *
3188  * @rdev: radeon_device pointer
3189  * @ib: IB object to schedule
3190  *
3191  * Schedule an IB in the DMA ring (CIK).
3192  */
3193 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3194                               struct radeon_ib *ib)
3195 {
3196         struct radeon_ring *ring = &rdev->ring[ib->ring];
3197         u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3198
3199         if (rdev->wb.enabled) {
3200                 u32 next_rptr = ring->wptr + 5;
3201                 while ((next_rptr & 7) != 4)
3202                         next_rptr++;
3203                 next_rptr += 4;
3204                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3205                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3206                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3207                 radeon_ring_write(ring, 1); /* number of DWs to follow */
3208                 radeon_ring_write(ring, next_rptr);
3209         }
3210
3211         /* IB packet must end on a 8 DW boundary */
3212         while ((ring->wptr & 7) != 4)
3213                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3214         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3215         radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3216         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3217         radeon_ring_write(ring, ib->length_dw);
3218
3219 }
3220
3221 /**
3222  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3223  *
3224  * @rdev: radeon_device pointer
3225  * @fence: radeon fence object
3226  *
3227  * Add a DMA fence packet to the ring to write
3228  * the fence seq number and DMA trap packet to generate
3229  * an interrupt if needed (CIK).
3230  */
3231 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3232                               struct radeon_fence *fence)
3233 {
3234         struct radeon_ring *ring = &rdev->ring[fence->ring];
3235         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3236         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3237                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3238         u32 ref_and_mask;
3239
3240         if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3241                 ref_and_mask = SDMA0;
3242         else
3243                 ref_and_mask = SDMA1;
3244
3245         /* write the fence */
3246         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3247         radeon_ring_write(ring, addr & 0xffffffff);
3248         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3249         radeon_ring_write(ring, fence->seq);
3250         /* generate an interrupt */
3251         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3252         /* flush HDP */
3253         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3254         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3255         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3256         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3257         radeon_ring_write(ring, ref_and_mask); /* MASK */
3258         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3259 }
3260
3261 /**
3262  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3263  *
3264  * @rdev: radeon_device pointer
3265  * @ring: radeon_ring structure holding ring information
3266  * @semaphore: radeon semaphore object
3267  * @emit_wait: wait or signal semaphore
3268  *
3269  * Add a DMA semaphore packet to the ring wait on or signal
3270  * other rings (CIK).
3271  */
3272 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3273                                   struct radeon_ring *ring,
3274                                   struct radeon_semaphore *semaphore,
3275                                   bool emit_wait)
3276 {
3277         u64 addr = semaphore->gpu_addr;
3278         u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3279
3280         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3281         radeon_ring_write(ring, addr & 0xfffffff8);
3282         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3283 }
3284
3285 /**
3286  * cik_sdma_gfx_stop - stop the gfx async dma engines
3287  *
3288  * @rdev: radeon_device pointer
3289  *
3290  * Stop the gfx async dma ring buffers (CIK).
3291  */
3292 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3293 {
3294         u32 rb_cntl, reg_offset;
3295         int i;
3296
3297         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3298
3299         for (i = 0; i < 2; i++) {
3300                 if (i == 0)
3301                         reg_offset = SDMA0_REGISTER_OFFSET;
3302                 else
3303                         reg_offset = SDMA1_REGISTER_OFFSET;
3304                 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3305                 rb_cntl &= ~SDMA_RB_ENABLE;
3306                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3307                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3308         }
3309 }
3310
3311 /**
3312  * cik_sdma_rlc_stop - stop the compute async dma engines
3313  *
3314  * @rdev: radeon_device pointer
3315  *
3316  * Stop the compute async dma queues (CIK).
3317  */
3318 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3319 {
3320         /* XXX todo */
3321 }
3322
3323 /**
3324  * cik_sdma_enable - stop the async dma engines
3325  *
3326  * @rdev: radeon_device pointer
3327  * @enable: enable/disable the DMA MEs.
3328  *
3329  * Halt or unhalt the async dma engines (CIK).
3330  */
3331 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3332 {
3333         u32 me_cntl, reg_offset;
3334         int i;
3335
3336         for (i = 0; i < 2; i++) {
3337                 if (i == 0)
3338                         reg_offset = SDMA0_REGISTER_OFFSET;
3339                 else
3340                         reg_offset = SDMA1_REGISTER_OFFSET;
3341                 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3342                 if (enable)
3343                         me_cntl &= ~SDMA_HALT;
3344                 else
3345                         me_cntl |= SDMA_HALT;
3346                 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3347         }
3348 }
3349
3350 /**
3351  * cik_sdma_gfx_resume - setup and start the async dma engines
3352  *
3353  * @rdev: radeon_device pointer
3354  *
3355  * Set up the gfx DMA ring buffers and enable them (CIK).
3356  * Returns 0 for success, error for failure.
3357  */
3358 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3359 {
3360         struct radeon_ring *ring;
3361         u32 rb_cntl, ib_cntl;
3362         u32 rb_bufsz;
3363         u32 reg_offset, wb_offset;
3364         int i, r;
3365
3366         for (i = 0; i < 2; i++) {
3367                 if (i == 0) {
3368                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3369                         reg_offset = SDMA0_REGISTER_OFFSET;
3370                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
3371                 } else {
3372                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3373                         reg_offset = SDMA1_REGISTER_OFFSET;
3374                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3375                 }
3376
3377                 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3378                 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3379
3380                 /* Set ring buffer size in dwords */
3381                 rb_bufsz = drm_order(ring->ring_size / 4);
3382                 rb_cntl = rb_bufsz << 1;
3383 #ifdef __BIG_ENDIAN
3384                 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3385 #endif
3386                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3387
3388                 /* Initialize the ring buffer's read and write pointers */
3389                 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3390                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3391
3392                 /* set the wb address whether it's enabled or not */
3393                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3394                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3395                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3396                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3397
3398                 if (rdev->wb.enabled)
3399                         rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3400
3401                 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3402                 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3403
3404                 ring->wptr = 0;
3405                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3406
3407                 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3408
3409                 /* enable DMA RB */
3410                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3411
3412                 ib_cntl = SDMA_IB_ENABLE;
3413 #ifdef __BIG_ENDIAN
3414                 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3415 #endif
3416                 /* enable DMA IBs */
3417                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3418
3419                 ring->ready = true;
3420
3421                 r = radeon_ring_test(rdev, ring->idx, ring);
3422                 if (r) {
3423                         ring->ready = false;
3424                         return r;
3425                 }
3426         }
3427
3428         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3429
3430         return 0;
3431 }
3432
3433 /**
3434  * cik_sdma_rlc_resume - setup and start the async dma engines
3435  *
3436  * @rdev: radeon_device pointer
3437  *
3438  * Set up the compute DMA queues and enable them (CIK).
3439  * Returns 0 for success, error for failure.
3440  */
3441 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3442 {
3443         /* XXX todo */
3444         return 0;
3445 }
3446
3447 /**
3448  * cik_sdma_load_microcode - load the sDMA ME ucode
3449  *
3450  * @rdev: radeon_device pointer
3451  *
3452  * Loads the sDMA0/1 ucode.
3453  * Returns 0 for success, -EINVAL if the ucode is not available.
3454  */
3455 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3456 {
3457         const __be32 *fw_data;
3458         int i;
3459
3460         if (!rdev->sdma_fw)
3461                 return -EINVAL;
3462
3463         /* stop the gfx rings and rlc compute queues */
3464         cik_sdma_gfx_stop(rdev);
3465         cik_sdma_rlc_stop(rdev);
3466
3467         /* halt the MEs */
3468         cik_sdma_enable(rdev, false);
3469
3470         /* sdma0 */
3471         fw_data = (const __be32 *)rdev->sdma_fw->data;
3472         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3473         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3474                 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3475         WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3476
3477         /* sdma1 */
3478         fw_data = (const __be32 *)rdev->sdma_fw->data;
3479         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3480         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3481                 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3482         WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3483
3484         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3485         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3486         return 0;
3487 }
3488
3489 /**
3490  * cik_sdma_resume - setup and start the async dma engines
3491  *
3492  * @rdev: radeon_device pointer
3493  *
3494  * Set up the DMA engines and enable them (CIK).
3495  * Returns 0 for success, error for failure.
3496  */
3497 static __unused int cik_sdma_resume(struct radeon_device *rdev)
3498 {
3499         int r;
3500
3501         /* Reset dma */
3502         WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3503         RREG32(SRBM_SOFT_RESET);
3504         udelay(50);
3505         WREG32(SRBM_SOFT_RESET, 0);
3506         RREG32(SRBM_SOFT_RESET);
3507
3508         r = cik_sdma_load_microcode(rdev);
3509         if (r)
3510                 return r;
3511
3512         /* unhalt the MEs */
3513         cik_sdma_enable(rdev, true);
3514
3515         /* start the gfx rings and rlc compute queues */
3516         r = cik_sdma_gfx_resume(rdev);
3517         if (r)
3518                 return r;
3519         r = cik_sdma_rlc_resume(rdev);
3520         if (r)
3521                 return r;
3522
3523         return 0;
3524 }
3525
3526 /**
3527  * cik_sdma_fini - tear down the async dma engines
3528  *
3529  * @rdev: radeon_device pointer
3530  *
3531  * Stop the async dma engines and free the rings (CIK).
3532  */
3533 static __unused void cik_sdma_fini(struct radeon_device *rdev)
3534 {
3535         /* stop the gfx rings and rlc compute queues */
3536         cik_sdma_gfx_stop(rdev);
3537         cik_sdma_rlc_stop(rdev);
3538         /* halt the MEs */
3539         cik_sdma_enable(rdev, false);
3540         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3541         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3542         /* XXX - compute dma queue tear down */
3543 }
3544
3545 /**
3546  * cik_copy_dma - copy pages using the DMA engine
3547  *
3548  * @rdev: radeon_device pointer
3549  * @src_offset: src GPU address
3550  * @dst_offset: dst GPU address
3551  * @num_gpu_pages: number of GPU pages to xfer
3552  * @fence: radeon fence object
3553  *
3554  * Copy GPU paging using the DMA engine (CIK).
3555  * Used by the radeon ttm implementation to move pages if
3556  * registered as the asic copy callback.
3557  */
3558 int cik_copy_dma(struct radeon_device *rdev,
3559                  uint64_t src_offset, uint64_t dst_offset,
3560                  unsigned num_gpu_pages,
3561                  struct radeon_fence **fence)
3562 {
3563         struct radeon_semaphore *sem = NULL;
3564         int ring_index = rdev->asic->copy.dma_ring_index;
3565         struct radeon_ring *ring = &rdev->ring[ring_index];
3566         u32 size_in_bytes, cur_size_in_bytes;
3567         int i, num_loops;
3568         int r = 0;
3569
3570         r = radeon_semaphore_create(rdev, &sem);
3571         if (r) {
3572                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3573                 return r;
3574         }
3575
3576         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3577         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3578         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3579         if (r) {
3580                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3581                 radeon_semaphore_free(rdev, &sem, NULL);
3582                 return r;
3583         }
3584
3585         if (radeon_fence_need_sync(*fence, ring->idx)) {
3586                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3587                                             ring->idx);
3588                 radeon_fence_note_sync(*fence, ring->idx);
3589         } else {
3590                 radeon_semaphore_free(rdev, &sem, NULL);
3591         }
3592
3593         for (i = 0; i < num_loops; i++) {
3594                 cur_size_in_bytes = size_in_bytes;
3595                 if (cur_size_in_bytes > 0x1fffff)
3596                         cur_size_in_bytes = 0x1fffff;
3597                 size_in_bytes -= cur_size_in_bytes;
3598                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3599                 radeon_ring_write(ring, cur_size_in_bytes);
3600                 radeon_ring_write(ring, 0); /* src/dst endian swap */
3601                 radeon_ring_write(ring, src_offset & 0xffffffff);
3602                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3603                 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3604                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3605                 src_offset += cur_size_in_bytes;
3606                 dst_offset += cur_size_in_bytes;
3607         }
3608
3609         r = radeon_fence_emit(rdev, fence, ring->idx);
3610         if (r) {
3611                 radeon_ring_unlock_undo(rdev, ring);
3612                 return r;
3613         }
3614
3615         radeon_ring_unlock_commit(rdev, ring);
3616         radeon_semaphore_free(rdev, &sem, *fence);
3617
3618         return r;
3619 }
3620
3621 /**
3622  * cik_sdma_ring_test - simple async dma engine test
3623  *
3624  * @rdev: radeon_device pointer
3625  * @ring: radeon_ring structure holding ring information
3626  *
3627  * Test the DMA engine by writing using it to write an
3628  * value to memory. (CIK).
3629  * Returns 0 for success, error for failure.
3630  */
3631 int cik_sdma_ring_test(struct radeon_device *rdev,
3632                        struct radeon_ring *ring)
3633 {
3634         unsigned i;
3635         int r;
3636         volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr;
3637         u32 tmp;
3638
3639         if (!ptr) {
3640                 DRM_ERROR("invalid vram scratch pointer\n");
3641                 return -EINVAL;
3642         }
3643
3644         tmp = 0xCAFEDEAD;
3645         writel(tmp, ptr);
3646
3647         r = radeon_ring_lock(rdev, ring, 4);
3648         if (r) {
3649                 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3650                 return r;
3651         }
3652         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3653         radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3654         radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3655         radeon_ring_write(ring, 1); /* number of DWs to follow */
3656         radeon_ring_write(ring, 0xDEADBEEF);
3657         radeon_ring_unlock_commit(rdev, ring);
3658
3659         for (i = 0; i < rdev->usec_timeout; i++) {
3660                 tmp = readl(ptr);
3661                 if (tmp == 0xDEADBEEF)
3662                         break;
3663                 DRM_UDELAY(1);
3664         }
3665
3666         if (i < rdev->usec_timeout) {
3667                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3668         } else {
3669                 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3670                           ring->idx, tmp);
3671                 r = -EINVAL;
3672         }
3673         return r;
3674 }
3675
3676 /**
3677  * cik_sdma_ib_test - test an IB on the DMA engine
3678  *
3679  * @rdev: radeon_device pointer
3680  * @ring: radeon_ring structure holding ring information
3681  *
3682  * Test a simple IB in the DMA ring (CIK).
3683  * Returns 0 on success, error on failure.
3684  */
3685 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3686 {
3687         struct radeon_ib ib;
3688         unsigned i;
3689         int r;
3690         volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr;
3691         u32 tmp = 0;
3692
3693         if (!ptr) {
3694                 DRM_ERROR("invalid vram scratch pointer\n");
3695                 return -EINVAL;
3696         }
3697
3698         tmp = 0xCAFEDEAD;
3699         writel(tmp, ptr);
3700
3701         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3702         if (r) {
3703                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3704                 return r;
3705         }
3706
3707         ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3708         ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3709         ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3710         ib.ptr[3] = 1;
3711         ib.ptr[4] = 0xDEADBEEF;
3712         ib.length_dw = 5;
3713
3714         r = radeon_ib_schedule(rdev, &ib, NULL);
3715         if (r) {
3716                 radeon_ib_free(rdev, &ib);
3717                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3718                 return r;
3719         }
3720         r = radeon_fence_wait(ib.fence, false);
3721         if (r) {
3722                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3723                 return r;
3724         }
3725         for (i = 0; i < rdev->usec_timeout; i++) {
3726                 tmp = readl(ptr);
3727                 if (tmp == 0xDEADBEEF)
3728                         break;
3729                 DRM_UDELAY(1);
3730         }
3731         if (i < rdev->usec_timeout) {
3732                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3733         } else {
3734                 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3735                 r = -EINVAL;
3736         }
3737         radeon_ib_free(rdev, &ib);
3738         return r;
3739 }
3740
3741
3742 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3743 {
3744         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
3745                 RREG32(GRBM_STATUS));
3746         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
3747                 RREG32(GRBM_STATUS2));
3748         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3749                 RREG32(GRBM_STATUS_SE0));
3750         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3751                 RREG32(GRBM_STATUS_SE1));
3752         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3753                 RREG32(GRBM_STATUS_SE2));
3754         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3755                 RREG32(GRBM_STATUS_SE3));
3756         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
3757                 RREG32(SRBM_STATUS));
3758         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
3759                 RREG32(SRBM_STATUS2));
3760         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
3761                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3762         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
3763                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3764         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3765         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3766                  RREG32(CP_STALLED_STAT1));
3767         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3768                  RREG32(CP_STALLED_STAT2));
3769         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3770                  RREG32(CP_STALLED_STAT3));
3771         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3772                  RREG32(CP_CPF_BUSY_STAT));
3773         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3774                  RREG32(CP_CPF_STALLED_STAT1));
3775         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3776         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3777         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3778                  RREG32(CP_CPC_STALLED_STAT1));
3779         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3780 }
3781
3782 /**
3783  * cik_gpu_check_soft_reset - check which blocks are busy
3784  *
3785  * @rdev: radeon_device pointer
3786  *
3787  * Check which blocks are busy and return the relevant reset
3788  * mask to be used by cik_gpu_soft_reset().
3789  * Returns a mask of the blocks to be reset.
3790  */
3791 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3792 {
3793         u32 reset_mask = 0;
3794         u32 tmp;
3795
3796         /* GRBM_STATUS */
3797         tmp = RREG32(GRBM_STATUS);
3798         if (tmp & (PA_BUSY | SC_BUSY |
3799                    BCI_BUSY | SX_BUSY |
3800                    TA_BUSY | VGT_BUSY |
3801                    DB_BUSY | CB_BUSY |
3802                    GDS_BUSY | SPI_BUSY |
3803                    IA_BUSY | IA_BUSY_NO_DMA))
3804                 reset_mask |= RADEON_RESET_GFX;
3805
3806         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3807                 reset_mask |= RADEON_RESET_CP;
3808
3809         /* GRBM_STATUS2 */
3810         tmp = RREG32(GRBM_STATUS2);
3811         if (tmp & RLC_BUSY)
3812                 reset_mask |= RADEON_RESET_RLC;
3813
3814         /* SDMA0_STATUS_REG */
3815         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3816         if (!(tmp & SDMA_IDLE))
3817                 reset_mask |= RADEON_RESET_DMA;
3818
3819         /* SDMA1_STATUS_REG */
3820         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3821         if (!(tmp & SDMA_IDLE))
3822                 reset_mask |= RADEON_RESET_DMA1;
3823
3824         /* SRBM_STATUS2 */
3825         tmp = RREG32(SRBM_STATUS2);
3826         if (tmp & SDMA_BUSY)
3827                 reset_mask |= RADEON_RESET_DMA;
3828
3829         if (tmp & SDMA1_BUSY)
3830                 reset_mask |= RADEON_RESET_DMA1;
3831
3832         /* SRBM_STATUS */
3833         tmp = RREG32(SRBM_STATUS);
3834
3835         if (tmp & IH_BUSY)
3836                 reset_mask |= RADEON_RESET_IH;
3837
3838         if (tmp & SEM_BUSY)
3839                 reset_mask |= RADEON_RESET_SEM;
3840
3841         if (tmp & GRBM_RQ_PENDING)
3842                 reset_mask |= RADEON_RESET_GRBM;
3843
3844         if (tmp & VMC_BUSY)
3845                 reset_mask |= RADEON_RESET_VMC;
3846
3847         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3848                    MCC_BUSY | MCD_BUSY))
3849                 reset_mask |= RADEON_RESET_MC;
3850
3851         if (evergreen_is_display_hung(rdev))
3852                 reset_mask |= RADEON_RESET_DISPLAY;
3853
3854         /* Skip MC reset as it's mostly likely not hung, just busy */
3855         if (reset_mask & RADEON_RESET_MC) {
3856                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3857                 reset_mask &= ~RADEON_RESET_MC;
3858         }
3859
3860         return reset_mask;
3861 }
3862
3863 /**
3864  * cik_gpu_soft_reset - soft reset GPU
3865  *
3866  * @rdev: radeon_device pointer
3867  * @reset_mask: mask of which blocks to reset
3868  *
3869  * Soft reset the blocks specified in @reset_mask.
3870  */
3871 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3872 {
3873         struct evergreen_mc_save save;
3874         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3875         u32 tmp;
3876
3877         if (reset_mask == 0)
3878                 return;
3879
3880         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3881
3882         cik_print_gpu_status_regs(rdev);
3883         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3884                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3885         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3886                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3887
3888         /* stop the rlc */
3889         cik_rlc_stop(rdev);
3890
3891         /* Disable GFX parsing/prefetching */
3892         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3893
3894         /* Disable MEC parsing/prefetching */
3895         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3896
3897         if (reset_mask & RADEON_RESET_DMA) {
3898                 /* sdma0 */
3899                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3900                 tmp |= SDMA_HALT;
3901                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3902         }
3903         if (reset_mask & RADEON_RESET_DMA1) {
3904                 /* sdma1 */
3905                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3906                 tmp |= SDMA_HALT;
3907                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3908         }
3909
3910         evergreen_mc_stop(rdev, &save);
3911         if (evergreen_mc_wait_for_idle(rdev)) {
3912                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3913         }
3914
3915         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3916                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3917
3918         if (reset_mask & RADEON_RESET_CP) {
3919                 grbm_soft_reset |= SOFT_RESET_CP;
3920
3921                 srbm_soft_reset |= SOFT_RESET_GRBM;
3922         }
3923
3924         if (reset_mask & RADEON_RESET_DMA)
3925                 srbm_soft_reset |= SOFT_RESET_SDMA;
3926
3927         if (reset_mask & RADEON_RESET_DMA1)
3928                 srbm_soft_reset |= SOFT_RESET_SDMA1;
3929
3930         if (reset_mask & RADEON_RESET_DISPLAY)
3931                 srbm_soft_reset |= SOFT_RESET_DC;
3932
3933         if (reset_mask & RADEON_RESET_RLC)
3934                 grbm_soft_reset |= SOFT_RESET_RLC;
3935
3936         if (reset_mask & RADEON_RESET_SEM)
3937                 srbm_soft_reset |= SOFT_RESET_SEM;
3938
3939         if (reset_mask & RADEON_RESET_IH)
3940                 srbm_soft_reset |= SOFT_RESET_IH;
3941
3942         if (reset_mask & RADEON_RESET_GRBM)
3943                 srbm_soft_reset |= SOFT_RESET_GRBM;
3944
3945         if (reset_mask & RADEON_RESET_VMC)
3946                 srbm_soft_reset |= SOFT_RESET_VMC;
3947
3948         if (!(rdev->flags & RADEON_IS_IGP)) {
3949                 if (reset_mask & RADEON_RESET_MC)
3950                         srbm_soft_reset |= SOFT_RESET_MC;
3951         }
3952
3953         if (grbm_soft_reset) {
3954                 tmp = RREG32(GRBM_SOFT_RESET);
3955                 tmp |= grbm_soft_reset;
3956                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3957                 WREG32(GRBM_SOFT_RESET, tmp);
3958                 tmp = RREG32(GRBM_SOFT_RESET);
3959
3960                 udelay(50);
3961
3962                 tmp &= ~grbm_soft_reset;
3963                 WREG32(GRBM_SOFT_RESET, tmp);
3964                 tmp = RREG32(GRBM_SOFT_RESET);
3965         }
3966
3967         if (srbm_soft_reset) {
3968                 tmp = RREG32(SRBM_SOFT_RESET);
3969                 tmp |= srbm_soft_reset;
3970                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3971                 WREG32(SRBM_SOFT_RESET, tmp);
3972                 tmp = RREG32(SRBM_SOFT_RESET);
3973
3974                 udelay(50);
3975
3976                 tmp &= ~srbm_soft_reset;
3977                 WREG32(SRBM_SOFT_RESET, tmp);
3978                 tmp = RREG32(SRBM_SOFT_RESET);
3979         }
3980
3981         /* Wait a little for things to settle down */
3982         udelay(50);
3983
3984         evergreen_mc_resume(rdev, &save);
3985         udelay(50);
3986
3987         cik_print_gpu_status_regs(rdev);
3988 }
3989
3990 /**
3991  * cik_asic_reset - soft reset GPU
3992  *
3993  * @rdev: radeon_device pointer
3994  *
3995  * Look up which blocks are hung and attempt
3996  * to reset them.
3997  * Returns 0 for success.
3998  */
3999 int cik_asic_reset(struct radeon_device *rdev)
4000 {
4001         u32 reset_mask;
4002
4003         reset_mask = cik_gpu_check_soft_reset(rdev);
4004
4005         if (reset_mask)
4006                 r600_set_bios_scratch_engine_hung(rdev, true);
4007
4008         cik_gpu_soft_reset(rdev, reset_mask);
4009
4010         reset_mask = cik_gpu_check_soft_reset(rdev);
4011
4012         if (!reset_mask)
4013                 r600_set_bios_scratch_engine_hung(rdev, false);
4014
4015         return 0;
4016 }
4017
4018 /**
4019  * cik_gfx_is_lockup - check if the 3D engine is locked up
4020  *
4021  * @rdev: radeon_device pointer
4022  * @ring: radeon_ring structure holding ring information
4023  *
4024  * Check if the 3D engine is locked up (CIK).
4025  * Returns true if the engine is locked, false if not.
4026  */
4027 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4028 {
4029         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4030
4031         if (!(reset_mask & (RADEON_RESET_GFX |
4032                             RADEON_RESET_COMPUTE |
4033                             RADEON_RESET_CP))) {
4034                 radeon_ring_lockup_update(ring);
4035                 return false;
4036         }
4037         /* force CP activities */
4038         radeon_ring_force_activity(rdev, ring);
4039         return radeon_ring_test_lockup(rdev, ring);
4040 }
4041
4042 /**
4043  * cik_sdma_is_lockup - Check if the DMA engine is locked up
4044  *
4045  * @rdev: radeon_device pointer
4046  * @ring: radeon_ring structure holding ring information
4047  *
4048  * Check if the async DMA engine is locked up (CIK).
4049  * Returns true if the engine appears to be locked up, false if not.
4050  */
4051 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4052 {
4053         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4054         u32 mask;
4055
4056         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4057                 mask = RADEON_RESET_DMA;
4058         else
4059                 mask = RADEON_RESET_DMA1;
4060
4061         if (!(reset_mask & mask)) {
4062                 radeon_ring_lockup_update(ring);
4063                 return false;
4064         }
4065         /* force ring activities */
4066         radeon_ring_force_activity(rdev, ring);
4067         return radeon_ring_test_lockup(rdev, ring);
4068 }
4069
4070 /* MC */
4071 /**
4072  * cik_mc_program - program the GPU memory controller
4073  *
4074  * @rdev: radeon_device pointer
4075  *
4076  * Set the location of vram, gart, and AGP in the GPU's
4077  * physical address space (CIK).
4078  */
4079 static __unused void cik_mc_program(struct radeon_device *rdev)
4080 {
4081         struct evergreen_mc_save save;
4082         u32 tmp;
4083         int i, j;
4084
4085         /* Initialize HDP */
4086         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4087                 WREG32((0x2c14 + j), 0x00000000);
4088                 WREG32((0x2c18 + j), 0x00000000);
4089                 WREG32((0x2c1c + j), 0x00000000);
4090                 WREG32((0x2c20 + j), 0x00000000);
4091                 WREG32((0x2c24 + j), 0x00000000);
4092         }
4093         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4094
4095         evergreen_mc_stop(rdev, &save);
4096         if (radeon_mc_wait_for_idle(rdev)) {
4097                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4098         }
4099         /* Lockout access through VGA aperture*/
4100         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4101         /* Update configuration */
4102         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4103                rdev->mc.vram_start >> 12);
4104         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4105                rdev->mc.vram_end >> 12);
4106         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4107                rdev->vram_scratch.gpu_addr >> 12);
4108         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4109         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4110         WREG32(MC_VM_FB_LOCATION, tmp);
4111         /* XXX double check these! */
4112         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4113         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4114         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4115         WREG32(MC_VM_AGP_BASE, 0);
4116         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4117         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4118         if (radeon_mc_wait_for_idle(rdev)) {
4119                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4120         }
4121         evergreen_mc_resume(rdev, &save);
4122         /* we need to own VRAM, so turn off the VGA renderer here
4123          * to stop it overwriting our objects */
4124         rv515_vga_render_disable(rdev);
4125 }
4126
4127 /**
4128  * cik_mc_init - initialize the memory controller driver params
4129  *
4130  * @rdev: radeon_device pointer
4131  *
4132  * Look up the amount of vram, vram width, and decide how to place
4133  * vram and gart within the GPU's physical address space (CIK).
4134  * Returns 0 for success.
4135  */
4136 static __unused int cik_mc_init(struct radeon_device *rdev)
4137 {
4138         u32 tmp;
4139         int chansize, numchan;
4140
4141         /* Get VRAM informations */
4142         rdev->mc.vram_is_ddr = true;
4143         tmp = RREG32(MC_ARB_RAMCFG);
4144         if (tmp & CHANSIZE_MASK) {
4145                 chansize = 64;
4146         } else {
4147                 chansize = 32;
4148         }
4149         tmp = RREG32(MC_SHARED_CHMAP);
4150         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4151         case 0:
4152         default:
4153                 numchan = 1;
4154                 break;
4155         case 1:
4156                 numchan = 2;
4157                 break;
4158         case 2:
4159                 numchan = 4;
4160                 break;
4161         case 3:
4162                 numchan = 8;
4163                 break;
4164         case 4:
4165                 numchan = 3;
4166                 break;
4167         case 5:
4168                 numchan = 6;
4169                 break;
4170         case 6:
4171                 numchan = 10;
4172                 break;
4173         case 7:
4174                 numchan = 12;
4175                 break;
4176         case 8:
4177                 numchan = 16;
4178                 break;
4179         }
4180         rdev->mc.vram_width = numchan * chansize;
4181         /* Could aper size report 0 ? */
4182         rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
4183         rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
4184         /* size in MB on si */
4185         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4186         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4187         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4188         si_vram_gtt_location(rdev, &rdev->mc);
4189         radeon_update_bandwidth_info(rdev);
4190
4191         return 0;
4192 }
4193
4194 /*
4195  * GART
4196  * VMID 0 is the physical GPU addresses as used by the kernel.
4197  * VMIDs 1-15 are used for userspace clients and are handled
4198  * by the radeon vm/hsa code.
4199  */
4200 /**
4201  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4202  *
4203  * @rdev: radeon_device pointer
4204  *
4205  * Flush the TLB for the VMID 0 page table (CIK).
4206  */
4207 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4208 {
4209         /* flush hdp cache */
4210         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4211
4212         /* bits 0-15 are the VM contexts0-15 */
4213         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4214 }
4215
4216 /**
4217  * cik_pcie_gart_enable - gart enable
4218  *
4219  * @rdev: radeon_device pointer
4220  *
4221  * This sets up the TLBs, programs the page tables for VMID0,
4222  * sets up the hw for VMIDs 1-15 which are allocated on
4223  * demand, and sets up the global locations for the LDS, GDS,
4224  * and GPUVM for FSA64 clients (CIK).
4225  * Returns 0 for success, errors for failure.
4226  */
4227 static __unused int cik_pcie_gart_enable(struct radeon_device *rdev)
4228 {
4229         int r, i;
4230
4231         if (rdev->gart.robj == NULL) {
4232                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4233                 return -EINVAL;
4234         }
4235         r = radeon_gart_table_vram_pin(rdev);
4236         if (r)
4237                 return r;
4238         radeon_gart_restore(rdev);
4239         /* Setup TLB control */
4240         WREG32(MC_VM_MX_L1_TLB_CNTL,
4241                (0xA << 7) |
4242                ENABLE_L1_TLB |
4243                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4244                ENABLE_ADVANCED_DRIVER_MODEL |
4245                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4246         /* Setup L2 cache */
4247         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4248                ENABLE_L2_FRAGMENT_PROCESSING |
4249                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4250                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4251                EFFECTIVE_L2_QUEUE_SIZE(7) |
4252                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4253         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4254         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4255                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4256         /* setup context0 */
4257         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4258         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4259         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4260         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4261                         (u32)(rdev->dummy_page.addr >> 12));
4262         WREG32(VM_CONTEXT0_CNTL2, 0);
4263         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4264                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4265
4266         WREG32(0x15D4, 0);
4267         WREG32(0x15D8, 0);
4268         WREG32(0x15DC, 0);
4269
4270         /* empty context1-15 */
4271         /* FIXME start with 4G, once using 2 level pt switch to full
4272          * vm size space
4273          */
4274         /* set vm size, must be a multiple of 4 */
4275         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4276         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4277         for (i = 1; i < 16; i++) {
4278                 if (i < 8)
4279                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4280                                rdev->gart.table_addr >> 12);
4281                 else
4282                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4283                                rdev->gart.table_addr >> 12);
4284         }
4285
4286         /* enable context1-15 */
4287         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4288                (u32)(rdev->dummy_page.addr >> 12));
4289         WREG32(VM_CONTEXT1_CNTL2, 4);
4290         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4291                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4292                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4293                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4294                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4295                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4296                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4297                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4298                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4299                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4300                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4301                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4302                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4303
4304         /* TC cache setup ??? */
4305         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4306         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4307         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4308
4309         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4310         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4311         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4312         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4313         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4314
4315         WREG32(TC_CFG_L1_VOLATILE, 0);
4316         WREG32(TC_CFG_L2_VOLATILE, 0);
4317
4318         if (rdev->family == CHIP_KAVERI) {
4319                 u32 tmp = RREG32(CHUB_CONTROL);
4320                 tmp &= ~BYPASS_VM;
4321                 WREG32(CHUB_CONTROL, tmp);
4322         }
4323
4324         /* XXX SH_MEM regs */
4325         /* where to put LDS, scratch, GPUVM in FSA64 space */
4326         spin_lock(&rdev->srbm_mutex);
4327         for (i = 0; i < 16; i++) {
4328                 cik_srbm_select(rdev, 0, 0, 0, i);
4329                 /* CP and shaders */
4330                 WREG32(SH_MEM_CONFIG, 0);
4331                 WREG32(SH_MEM_APE1_BASE, 1);
4332                 WREG32(SH_MEM_APE1_LIMIT, 0);
4333                 WREG32(SH_MEM_BASES, 0);
4334                 /* SDMA GFX */
4335                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4336                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4337                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4338                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4339                 /* XXX SDMA RLC - todo */
4340         }
4341         cik_srbm_select(rdev, 0, 0, 0, 0);
4342         spin_unlock(&rdev->srbm_mutex);
4343
4344         cik_pcie_gart_tlb_flush(rdev);
4345         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4346                  (unsigned)(rdev->mc.gtt_size >> 20),
4347                  (unsigned long long)rdev->gart.table_addr);
4348         rdev->gart.ready = true;
4349         return 0;
4350 }
4351
4352 /**
4353  * cik_pcie_gart_disable - gart disable
4354  *
4355  * @rdev: radeon_device pointer
4356  *
4357  * This disables all VM page table (CIK).
4358  */
4359 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4360 {
4361         /* Disable all tables */
4362         WREG32(VM_CONTEXT0_CNTL, 0);
4363         WREG32(VM_CONTEXT1_CNTL, 0);
4364         /* Setup TLB control */
4365         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4366                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4367         /* Setup L2 cache */
4368         WREG32(VM_L2_CNTL,
4369                ENABLE_L2_FRAGMENT_PROCESSING |
4370                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4371                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4372                EFFECTIVE_L2_QUEUE_SIZE(7) |
4373                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4374         WREG32(VM_L2_CNTL2, 0);
4375         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4376                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4377         radeon_gart_table_vram_unpin(rdev);
4378 }
4379
4380 /**
4381  * cik_pcie_gart_fini - vm fini callback
4382  *
4383  * @rdev: radeon_device pointer
4384  *
4385  * Tears down the driver GART/VM setup (CIK).
4386  */
4387 static __unused void cik_pcie_gart_fini(struct radeon_device *rdev)
4388 {
4389         cik_pcie_gart_disable(rdev);
4390         radeon_gart_table_vram_free(rdev);
4391         radeon_gart_fini(rdev);
4392 }
4393
4394 /* vm parser */
4395 /**
4396  * cik_ib_parse - vm ib_parse callback
4397  *
4398  * @rdev: radeon_device pointer
4399  * @ib: indirect buffer pointer
4400  *
4401  * CIK uses hw IB checking so this is a nop (CIK).
4402  */
4403 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4404 {
4405         return 0;
4406 }
4407
4408 /*
4409  * vm
4410  * VMID 0 is the physical GPU addresses as used by the kernel.
4411  * VMIDs 1-15 are used for userspace clients and are handled
4412  * by the radeon vm/hsa code.
4413  */
4414 /**
4415  * cik_vm_init - cik vm init callback
4416  *
4417  * @rdev: radeon_device pointer
4418  *
4419  * Inits cik specific vm parameters (number of VMs, base of vram for
4420  * VMIDs 1-15) (CIK).
4421  * Returns 0 for success.
4422  */
4423 int cik_vm_init(struct radeon_device *rdev)
4424 {
4425         /* number of VMs */
4426         rdev->vm_manager.nvm = 16;
4427         /* base offset of vram pages */
4428         if (rdev->flags & RADEON_IS_IGP) {
4429                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4430                 tmp <<= 22;
4431                 rdev->vm_manager.vram_base_offset = tmp;
4432         } else
4433                 rdev->vm_manager.vram_base_offset = 0;
4434
4435         return 0;
4436 }
4437
4438 /**
4439  * cik_vm_fini - cik vm fini callback
4440  *
4441  * @rdev: radeon_device pointer
4442  *
4443  * Tear down any asic specific VM setup (CIK).
4444  */
4445 void cik_vm_fini(struct radeon_device *rdev)
4446 {
4447 }
4448
4449 /**
4450  * cik_vm_decode_fault - print human readable fault info
4451  *
4452  * @rdev: radeon_device pointer
4453  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4454  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4455  *
4456  * Print human readable fault information (CIK).
4457  */
4458 static void cik_vm_decode_fault(struct radeon_device *rdev,
4459                                 u32 status, u32 addr, u32 mc_client)
4460 {
4461         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4462         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4463         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4464         char *block = (char *)&mc_client;
4465
4466         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4467                protections, vmid, addr,
4468                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4469                block, mc_id);
4470 }
4471
4472 /**
4473  * cik_vm_flush - cik vm flush using the CP
4474  *
4475  * @rdev: radeon_device pointer
4476  *
4477  * Update the page table base and flush the VM TLB
4478  * using the CP (CIK).
4479  */
4480 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4481 {
4482         struct radeon_ring *ring = &rdev->ring[ridx];
4483
4484         if (vm == NULL)
4485                 return;
4486
4487         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4488         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4489                                  WRITE_DATA_DST_SEL(0)));
4490         if (vm->id < 8) {
4491                 radeon_ring_write(ring,
4492                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4493         } else {
4494                 radeon_ring_write(ring,
4495                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4496         }
4497         radeon_ring_write(ring, 0);
4498         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4499
4500         /* update SH_MEM_* regs */
4501         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4502         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4503                                  WRITE_DATA_DST_SEL(0)));
4504         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4505         radeon_ring_write(ring, 0);
4506         radeon_ring_write(ring, VMID(vm->id));
4507
4508         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4509         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4510                                  WRITE_DATA_DST_SEL(0)));
4511         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4512         radeon_ring_write(ring, 0);
4513
4514         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4515         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4516         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4517         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4518
4519         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4520         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4521                                  WRITE_DATA_DST_SEL(0)));
4522         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4523         radeon_ring_write(ring, 0);
4524         radeon_ring_write(ring, VMID(0));
4525
4526         /* HDP flush */
4527         /* We should be using the WAIT_REG_MEM packet here like in
4528          * cik_fence_ring_emit(), but it causes the CP to hang in this
4529          * context...
4530          */
4531         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4532         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4533                                  WRITE_DATA_DST_SEL(0)));
4534         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4535         radeon_ring_write(ring, 0);
4536         radeon_ring_write(ring, 0);
4537
4538         /* bits 0-15 are the VM contexts0-15 */
4539         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4540         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4541                                  WRITE_DATA_DST_SEL(0)));
4542         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4543         radeon_ring_write(ring, 0);
4544         radeon_ring_write(ring, 1 << vm->id);
4545
4546         /* compute doesn't have PFP */
4547         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4548                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4549                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4550                 radeon_ring_write(ring, 0x0);
4551         }
4552 }
4553
4554 /**
4555  * cik_vm_set_page - update the page tables using sDMA
4556  *
4557  * @rdev: radeon_device pointer
4558  * @ib: indirect buffer to fill with commands
4559  * @pe: addr of the page entry
4560  * @addr: dst addr to write into pe
4561  * @count: number of page entries to update
4562  * @incr: increase next addr by incr bytes
4563  * @flags: access flags
4564  *
4565  * Update the page tables using CP or sDMA (CIK).
4566  */
4567 void cik_vm_set_page(struct radeon_device *rdev,
4568                      struct radeon_ib *ib,
4569                      uint64_t pe,
4570                      uint64_t addr, unsigned count,
4571                      uint32_t incr, uint32_t flags)
4572 {
4573         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4574         uint64_t value;
4575         unsigned ndw;
4576
4577         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4578                 /* CP */
4579                 while (count) {
4580                         ndw = 2 + count * 2;
4581                         if (ndw > 0x3FFE)
4582                                 ndw = 0x3FFE;
4583
4584                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4585                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4586                                                     WRITE_DATA_DST_SEL(1));
4587                         ib->ptr[ib->length_dw++] = pe;
4588                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4589                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4590                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4591                                         value = radeon_vm_map_gart(rdev, addr);
4592                                         value &= 0xFFFFFFFFFFFFF000ULL;
4593                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4594                                         value = addr;
4595                                 } else {
4596                                         value = 0;
4597                                 }
4598                                 addr += incr;
4599                                 value |= r600_flags;
4600                                 ib->ptr[ib->length_dw++] = value;
4601                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4602                         }
4603                 }
4604         } else {
4605                 /* DMA */
4606                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4607                         while (count) {
4608                                 ndw = count * 2;
4609                                 if (ndw > 0xFFFFE)
4610                                         ndw = 0xFFFFE;
4611
4612                                 /* for non-physically contiguous pages (system) */
4613                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4614                                 ib->ptr[ib->length_dw++] = pe;
4615                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4616                                 ib->ptr[ib->length_dw++] = ndw;
4617                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4618                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4619                                                 value = radeon_vm_map_gart(rdev, addr);
4620                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4621                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4622                                                 value = addr;
4623                                         } else {
4624                                                 value = 0;
4625                                         }
4626                                         addr += incr;
4627                                         value |= r600_flags;
4628                                         ib->ptr[ib->length_dw++] = value;
4629                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4630                                 }
4631                         }
4632                 } else {
4633                         while (count) {
4634                                 ndw = count;
4635                                 if (ndw > 0x7FFFF)
4636                                         ndw = 0x7FFFF;
4637
4638                                 if (flags & RADEON_VM_PAGE_VALID)
4639                                         value = addr;
4640                                 else
4641                                         value = 0;
4642                                 /* for physically contiguous pages (vram) */
4643                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4644                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4645                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4646                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4647                                 ib->ptr[ib->length_dw++] = 0;
4648                                 ib->ptr[ib->length_dw++] = value; /* value */
4649                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4650                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4651                                 ib->ptr[ib->length_dw++] = 0;
4652                                 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4653                                 pe += ndw * 8;
4654                                 addr += ndw * incr;
4655                                 count -= ndw;
4656                         }
4657                 }
4658                 while (ib->length_dw & 0x7)
4659                         ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4660         }
4661 }
4662
4663 /**
4664  * cik_dma_vm_flush - cik vm flush using sDMA
4665  *
4666  * @rdev: radeon_device pointer
4667  *
4668  * Update the page table base and flush the VM TLB
4669  * using sDMA (CIK).
4670  */
4671 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4672 {
4673         struct radeon_ring *ring = &rdev->ring[ridx];
4674         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4675                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4676         u32 ref_and_mask;
4677
4678         if (vm == NULL)
4679                 return;
4680
4681         if (ridx == R600_RING_TYPE_DMA_INDEX)
4682                 ref_and_mask = SDMA0;
4683         else
4684                 ref_and_mask = SDMA1;
4685
4686         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4687         if (vm->id < 8) {
4688                 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4689         } else {
4690                 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4691         }
4692         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4693
4694         /* update SH_MEM_* regs */
4695         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4696         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4697         radeon_ring_write(ring, VMID(vm->id));
4698
4699         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4700         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4701         radeon_ring_write(ring, 0);
4702
4703         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4704         radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4705         radeon_ring_write(ring, 0);
4706
4707         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4708         radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4709         radeon_ring_write(ring, 1);
4710
4711         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4712         radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4713         radeon_ring_write(ring, 0);
4714
4715         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4716         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4717         radeon_ring_write(ring, VMID(0));
4718
4719         /* flush HDP */
4720         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4721         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4722         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4723         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4724         radeon_ring_write(ring, ref_and_mask); /* MASK */
4725         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4726
4727         /* flush TLB */
4728         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4729         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4730         radeon_ring_write(ring, 1 << vm->id);
4731 }
4732
4733 /*
4734  * RLC
4735  * The RLC is a multi-purpose microengine that handles a
4736  * variety of functions, the most important of which is
4737  * the interrupt controller.
4738  */
4739 /**
4740  * cik_rlc_stop - stop the RLC ME
4741  *
4742  * @rdev: radeon_device pointer
4743  *
4744  * Halt the RLC ME (MicroEngine) (CIK).
4745  */
4746 static void cik_rlc_stop(struct radeon_device *rdev)
4747 {
4748         int i, j, k;
4749         u32 mask, tmp;
4750
4751         tmp = RREG32(CP_INT_CNTL_RING0);
4752         tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4753         WREG32(CP_INT_CNTL_RING0, tmp);
4754
4755         RREG32(CB_CGTT_SCLK_CTRL);
4756         RREG32(CB_CGTT_SCLK_CTRL);
4757         RREG32(CB_CGTT_SCLK_CTRL);
4758         RREG32(CB_CGTT_SCLK_CTRL);
4759
4760         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4761         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4762
4763         WREG32(RLC_CNTL, 0);
4764
4765         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4766                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4767                         cik_select_se_sh(rdev, i, j);
4768                         for (k = 0; k < rdev->usec_timeout; k++) {
4769                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4770                                         break;
4771                                 udelay(1);
4772                         }
4773                 }
4774         }
4775         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4776
4777         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4778         for (k = 0; k < rdev->usec_timeout; k++) {
4779                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4780                         break;
4781                 udelay(1);
4782         }
4783 }
4784
4785 /**
4786  * cik_rlc_start - start the RLC ME
4787  *
4788  * @rdev: radeon_device pointer
4789  *
4790  * Unhalt the RLC ME (MicroEngine) (CIK).
4791  */
4792 static void cik_rlc_start(struct radeon_device *rdev)
4793 {
4794         u32 tmp;
4795
4796         WREG32(RLC_CNTL, RLC_ENABLE);
4797
4798         tmp = RREG32(CP_INT_CNTL_RING0);
4799         tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4800         WREG32(CP_INT_CNTL_RING0, tmp);
4801
4802         udelay(50);
4803 }
4804
4805 /**
4806  * cik_rlc_resume - setup the RLC hw
4807  *
4808  * @rdev: radeon_device pointer
4809  *
4810  * Initialize the RLC registers, load the ucode,
4811  * and start the RLC (CIK).
4812  * Returns 0 for success, -EINVAL if the ucode is not available.
4813  */
4814 static __unused int cik_rlc_resume(struct radeon_device *rdev)
4815 {
4816         u32 i, size;
4817         u32 clear_state_info[3];
4818         const __be32 *fw_data;
4819
4820         if (!rdev->rlc_fw)
4821                 return -EINVAL;
4822
4823         switch (rdev->family) {
4824         case CHIP_BONAIRE:
4825         default:
4826                 size = BONAIRE_RLC_UCODE_SIZE;
4827                 break;
4828         case CHIP_KAVERI:
4829                 size = KV_RLC_UCODE_SIZE;
4830                 break;
4831         case CHIP_KABINI:
4832                 size = KB_RLC_UCODE_SIZE;
4833                 break;
4834         }
4835
4836         cik_rlc_stop(rdev);
4837
4838         WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4839         RREG32(GRBM_SOFT_RESET);
4840         udelay(50);
4841         WREG32(GRBM_SOFT_RESET, 0);
4842         RREG32(GRBM_SOFT_RESET);
4843         udelay(50);
4844
4845         WREG32(RLC_LB_CNTR_INIT, 0);
4846         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4847
4848         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4849         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4850         WREG32(RLC_LB_PARAMS, 0x00600408);
4851         WREG32(RLC_LB_CNTL, 0x80000004);
4852
4853         WREG32(RLC_MC_CNTL, 0);
4854         WREG32(RLC_UCODE_CNTL, 0);
4855
4856         fw_data = (const __be32 *)rdev->rlc_fw->data;
4857                 WREG32(RLC_GPM_UCODE_ADDR, 0);
4858         for (i = 0; i < size; i++)
4859                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4860         WREG32(RLC_GPM_UCODE_ADDR, 0);
4861
4862         /* XXX */
4863         clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4864         clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4865         clear_state_info[2] = 0;//cik_default_size;
4866         WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4867         for (i = 0; i < 3; i++)
4868                 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4869         WREG32(RLC_DRIVER_DMA_STATUS, 0);
4870
4871         cik_rlc_start(rdev);
4872
4873         return 0;
4874 }
4875
4876 /*
4877  * Interrupts
4878  * Starting with r6xx, interrupts are handled via a ring buffer.
4879  * Ring buffers are areas of GPU accessible memory that the GPU
4880  * writes interrupt vectors into and the host reads vectors out of.
4881  * There is a rptr (read pointer) that determines where the
4882  * host is currently reading, and a wptr (write pointer)
4883  * which determines where the GPU has written.  When the
4884  * pointers are equal, the ring is idle.  When the GPU
4885  * writes vectors to the ring buffer, it increments the
4886  * wptr.  When there is an interrupt, the host then starts
4887  * fetching commands and processing them until the pointers are
4888  * equal again at which point it updates the rptr.
4889  */
4890
4891 /**
4892  * cik_enable_interrupts - Enable the interrupt ring buffer
4893  *
4894  * @rdev: radeon_device pointer
4895  *
4896  * Enable the interrupt ring buffer (CIK).
4897  */
4898 static void cik_enable_interrupts(struct radeon_device *rdev)
4899 {
4900         u32 ih_cntl = RREG32(IH_CNTL);
4901         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4902
4903         ih_cntl |= ENABLE_INTR;
4904         ih_rb_cntl |= IH_RB_ENABLE;
4905         WREG32(IH_CNTL, ih_cntl);
4906         WREG32(IH_RB_CNTL, ih_rb_cntl);
4907         rdev->ih.enabled = true;
4908 }
4909
4910 /**
4911  * cik_disable_interrupts - Disable the interrupt ring buffer
4912  *
4913  * @rdev: radeon_device pointer
4914  *
4915  * Disable the interrupt ring buffer (CIK).
4916  */
4917 static void cik_disable_interrupts(struct radeon_device *rdev)
4918 {
4919         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4920         u32 ih_cntl = RREG32(IH_CNTL);
4921
4922         ih_rb_cntl &= ~IH_RB_ENABLE;
4923         ih_cntl &= ~ENABLE_INTR;
4924         WREG32(IH_RB_CNTL, ih_rb_cntl);
4925         WREG32(IH_CNTL, ih_cntl);
4926         /* set rptr, wptr to 0 */
4927         WREG32(IH_RB_RPTR, 0);
4928         WREG32(IH_RB_WPTR, 0);
4929         rdev->ih.enabled = false;
4930         rdev->ih.rptr = 0;
4931 }
4932
4933 /**
4934  * cik_disable_interrupt_state - Disable all interrupt sources
4935  *
4936  * @rdev: radeon_device pointer
4937  *
4938  * Clear all interrupt enable bits used by the driver (CIK).
4939  */
4940 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4941 {
4942         u32 tmp;
4943
4944         /* gfx ring */
4945         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4946         /* sdma */
4947         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4948         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4949         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4950         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4951         /* compute queues */
4952         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4953         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4954         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4955         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4956         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4957         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4958         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4959         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4960         /* grbm */
4961         WREG32(GRBM_INT_CNTL, 0);
4962         /* vline/vblank, etc. */
4963         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4964         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4965         if (rdev->num_crtc >= 4) {
4966                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4967                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4968         }
4969         if (rdev->num_crtc >= 6) {
4970                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4971                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4972         }
4973
4974         /* dac hotplug */
4975         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4976
4977         /* digital hotplug */
4978         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4979         WREG32(DC_HPD1_INT_CONTROL, tmp);
4980         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4981         WREG32(DC_HPD2_INT_CONTROL, tmp);
4982         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4983         WREG32(DC_HPD3_INT_CONTROL, tmp);
4984         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4985         WREG32(DC_HPD4_INT_CONTROL, tmp);
4986         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4987         WREG32(DC_HPD5_INT_CONTROL, tmp);
4988         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4989         WREG32(DC_HPD6_INT_CONTROL, tmp);
4990
4991 }
4992
4993 /**
4994  * cik_irq_init - init and enable the interrupt ring
4995  *
4996  * @rdev: radeon_device pointer
4997  *
4998  * Allocate a ring buffer for the interrupt controller,
4999  * enable the RLC, disable interrupts, enable the IH
5000  * ring buffer and enable it (CIK).
5001  * Called at device load and reume.
5002  * Returns 0 for success, errors for failure.
5003  */
5004 static __unused int cik_irq_init(struct radeon_device *rdev)
5005 {
5006         int ret = 0;
5007         int rb_bufsz;
5008         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5009
5010         /* allocate ring */
5011         ret = r600_ih_ring_alloc(rdev);
5012         if (ret)
5013                 return ret;
5014
5015         /* disable irqs */
5016         cik_disable_interrupts(rdev);
5017
5018         /* init rlc */
5019         ret = cik_rlc_resume(rdev);
5020         if (ret) {
5021                 r600_ih_ring_fini(rdev);
5022                 return ret;
5023         }
5024
5025         /* setup interrupt control */
5026         /* XXX this should actually be a bus address, not an MC address. same on older asics */
5027         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5028         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5029         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5030          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5031          */
5032         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5033         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5034         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5035         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5036
5037         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5038         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5039
5040         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5041                       IH_WPTR_OVERFLOW_CLEAR |
5042                       (rb_bufsz << 1));
5043
5044         if (rdev->wb.enabled)
5045                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5046
5047         /* set the writeback address whether it's enabled or not */
5048         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5049         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5050
5051         WREG32(IH_RB_CNTL, ih_rb_cntl);
5052
5053         /* set rptr, wptr to 0 */
5054         WREG32(IH_RB_RPTR, 0);
5055         WREG32(IH_RB_WPTR, 0);
5056
5057         /* Default settings for IH_CNTL (disabled at first) */
5058         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5059         /* RPTR_REARM only works if msi's are enabled */
5060         if (rdev->msi_enabled)
5061                 ih_cntl |= RPTR_REARM;
5062         WREG32(IH_CNTL, ih_cntl);
5063
5064         /* force the active interrupt state to all disabled */
5065         cik_disable_interrupt_state(rdev);
5066
5067         pci_enable_busmaster(rdev->dev);
5068
5069         /* enable irqs */
5070         cik_enable_interrupts(rdev);
5071
5072         return ret;
5073 }
5074
5075 /**
5076  * cik_irq_set - enable/disable interrupt sources
5077  *
5078  * @rdev: radeon_device pointer
5079  *
5080  * Enable interrupt sources on the GPU (vblanks, hpd,
5081  * etc.) (CIK).
5082  * Returns 0 for success, errors for failure.
5083  */
5084 int cik_irq_set(struct radeon_device *rdev)
5085 {
5086         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5087                 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5088         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5089         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5090         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5091         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5092         u32 grbm_int_cntl = 0;
5093         u32 dma_cntl, dma_cntl1;
5094
5095         if (!rdev->irq.installed) {
5096                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5097                 return -EINVAL;
5098         }
5099         /* don't enable anything if the ih is disabled */
5100         if (!rdev->ih.enabled) {
5101                 cik_disable_interrupts(rdev);
5102                 /* force the active interrupt state to all disabled */
5103                 cik_disable_interrupt_state(rdev);
5104                 return 0;
5105         }
5106
5107         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5108         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5109         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5110         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5111         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5112         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5113
5114         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5115         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5116
5117         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5118         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5119         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5120         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5121         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5122         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5123         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5124         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5125
5126         /* enable CP interrupts on all rings */
5127         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5128                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5129                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5130         }
5131         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5132                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5133                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5134                 if (ring->me == 1) {
5135                         switch (ring->pipe) {
5136                         case 0:
5137                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5138                                 break;
5139                         case 1:
5140                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5141                                 break;
5142                         case 2:
5143                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5144                                 break;
5145                         case 3:
5146                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5147                                 break;
5148                         default:
5149                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5150                                 break;
5151                         }
5152                 } else if (ring->me == 2) {
5153                         switch (ring->pipe) {
5154                         case 0:
5155                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5156                                 break;
5157                         case 1:
5158                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5159                                 break;
5160                         case 2:
5161                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5162                                 break;
5163                         case 3:
5164                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5165                                 break;
5166                         default:
5167                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5168                                 break;
5169                         }
5170                 } else {
5171                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5172                 }
5173         }
5174         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5175                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5176                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5177                 if (ring->me == 1) {
5178                         switch (ring->pipe) {
5179                         case 0:
5180                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5181                                 break;
5182                         case 1:
5183                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5184                                 break;
5185                         case 2:
5186                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5187                                 break;
5188                         case 3:
5189                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5190                                 break;
5191                         default:
5192                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5193                                 break;
5194                         }
5195                 } else if (ring->me == 2) {
5196                         switch (ring->pipe) {
5197                         case 0:
5198                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5199                                 break;
5200                         case 1:
5201                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5202                                 break;
5203                         case 2:
5204                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5205                                 break;
5206                         case 3:
5207                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5208                                 break;
5209                         default:
5210                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5211                                 break;
5212                         }
5213                 } else {
5214                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5215                 }
5216         }
5217
5218         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5219                 DRM_DEBUG("cik_irq_set: sw int dma\n");
5220                 dma_cntl |= TRAP_ENABLE;
5221         }
5222
5223         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5224                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5225                 dma_cntl1 |= TRAP_ENABLE;
5226         }
5227
5228         if (rdev->irq.crtc_vblank_int[0] ||
5229             atomic_read(&rdev->irq.pflip[0])) {
5230                 DRM_DEBUG("cik_irq_set: vblank 0\n");
5231                 crtc1 |= VBLANK_INTERRUPT_MASK;
5232         }
5233         if (rdev->irq.crtc_vblank_int[1] ||
5234             atomic_read(&rdev->irq.pflip[1])) {
5235                 DRM_DEBUG("cik_irq_set: vblank 1\n");
5236                 crtc2 |= VBLANK_INTERRUPT_MASK;
5237         }
5238         if (rdev->irq.crtc_vblank_int[2] ||
5239             atomic_read(&rdev->irq.pflip[2])) {
5240                 DRM_DEBUG("cik_irq_set: vblank 2\n");
5241                 crtc3 |= VBLANK_INTERRUPT_MASK;
5242         }
5243         if (rdev->irq.crtc_vblank_int[3] ||
5244             atomic_read(&rdev->irq.pflip[3])) {
5245                 DRM_DEBUG("cik_irq_set: vblank 3\n");
5246                 crtc4 |= VBLANK_INTERRUPT_MASK;
5247         }
5248         if (rdev->irq.crtc_vblank_int[4] ||
5249             atomic_read(&rdev->irq.pflip[4])) {
5250                 DRM_DEBUG("cik_irq_set: vblank 4\n");
5251                 crtc5 |= VBLANK_INTERRUPT_MASK;
5252         }
5253         if (rdev->irq.crtc_vblank_int[5] ||
5254             atomic_read(&rdev->irq.pflip[5])) {
5255                 DRM_DEBUG("cik_irq_set: vblank 5\n");
5256                 crtc6 |= VBLANK_INTERRUPT_MASK;
5257         }
5258         if (rdev->irq.hpd[0]) {
5259                 DRM_DEBUG("cik_irq_set: hpd 1\n");
5260                 hpd1 |= DC_HPDx_INT_EN;
5261         }
5262         if (rdev->irq.hpd[1]) {
5263                 DRM_DEBUG("cik_irq_set: hpd 2\n");
5264                 hpd2 |= DC_HPDx_INT_EN;
5265         }
5266         if (rdev->irq.hpd[2]) {
5267                 DRM_DEBUG("cik_irq_set: hpd 3\n");
5268                 hpd3 |= DC_HPDx_INT_EN;
5269         }
5270         if (rdev->irq.hpd[3]) {
5271                 DRM_DEBUG("cik_irq_set: hpd 4\n");
5272                 hpd4 |= DC_HPDx_INT_EN;
5273         }
5274         if (rdev->irq.hpd[4]) {
5275                 DRM_DEBUG("cik_irq_set: hpd 5\n");
5276                 hpd5 |= DC_HPDx_INT_EN;
5277         }
5278         if (rdev->irq.hpd[5]) {
5279                 DRM_DEBUG("cik_irq_set: hpd 6\n");
5280                 hpd6 |= DC_HPDx_INT_EN;
5281         }
5282
5283         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5284
5285         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5286         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5287
5288         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5289         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5290         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5291         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5292         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5293         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5294         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5295         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5296
5297         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5298
5299         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5300         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5301         if (rdev->num_crtc >= 4) {
5302                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5303                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5304         }
5305         if (rdev->num_crtc >= 6) {
5306                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5307                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5308         }
5309
5310         WREG32(DC_HPD1_INT_CONTROL, hpd1);
5311         WREG32(DC_HPD2_INT_CONTROL, hpd2);
5312         WREG32(DC_HPD3_INT_CONTROL, hpd3);
5313         WREG32(DC_HPD4_INT_CONTROL, hpd4);
5314         WREG32(DC_HPD5_INT_CONTROL, hpd5);
5315         WREG32(DC_HPD6_INT_CONTROL, hpd6);
5316
5317         return 0;
5318 }
5319
5320 /**
5321  * cik_irq_ack - ack interrupt sources
5322  *
5323  * @rdev: radeon_device pointer
5324  *
5325  * Ack interrupt sources on the GPU (vblanks, hpd,
5326  * etc.) (CIK).  Certain interrupts sources are sw
5327  * generated and do not require an explicit ack.
5328  */
5329 static inline void cik_irq_ack(struct radeon_device *rdev)
5330 {
5331         u32 tmp;
5332
5333         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5334         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5335         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5336         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5337         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5338         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5339         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5340
5341         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5342                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5343         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5344                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5345         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5346                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5347         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5348                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5349
5350         if (rdev->num_crtc >= 4) {
5351                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5352                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5353                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5354                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5355                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5356                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5357                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5358                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5359         }
5360
5361         if (rdev->num_crtc >= 6) {
5362                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5363                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5364                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5365                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5366                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5367                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5368                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5369                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5370         }
5371
5372         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5373                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5374                 tmp |= DC_HPDx_INT_ACK;
5375                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5376         }
5377         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5378                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5379                 tmp |= DC_HPDx_INT_ACK;
5380                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5381         }
5382         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5383                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5384                 tmp |= DC_HPDx_INT_ACK;
5385                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5386         }
5387         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5388                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5389                 tmp |= DC_HPDx_INT_ACK;
5390                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5391         }
5392         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5393                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5394                 tmp |= DC_HPDx_INT_ACK;
5395                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5396         }
5397         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5398                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5399                 tmp |= DC_HPDx_INT_ACK;
5400                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5401         }
5402 }
5403
5404 /**
5405  * cik_irq_disable - disable interrupts
5406  *
5407  * @rdev: radeon_device pointer
5408  *
5409  * Disable interrupts on the hw (CIK).
5410  */
5411 static void cik_irq_disable(struct radeon_device *rdev)
5412 {
5413         cik_disable_interrupts(rdev);
5414         /* Wait and acknowledge irq */
5415         mdelay(1);
5416         cik_irq_ack(rdev);
5417         cik_disable_interrupt_state(rdev);
5418 }
5419
5420 /**
5421  * cik_irq_disable - disable interrupts for suspend
5422  *
5423  * @rdev: radeon_device pointer
5424  *
5425  * Disable interrupts and stop the RLC (CIK).
5426  * Used for suspend.
5427  */
5428 static void cik_irq_suspend(struct radeon_device *rdev)
5429 {
5430         cik_irq_disable(rdev);
5431         cik_rlc_stop(rdev);
5432 }
5433
5434 /**
5435  * cik_irq_fini - tear down interrupt support
5436  *
5437  * @rdev: radeon_device pointer
5438  *
5439  * Disable interrupts on the hw and free the IH ring
5440  * buffer (CIK).
5441  * Used for driver unload.
5442  */
5443 static __unused void cik_irq_fini(struct radeon_device *rdev)
5444 {
5445         cik_irq_suspend(rdev);
5446         r600_ih_ring_fini(rdev);
5447 }
5448
5449 /**
5450  * cik_get_ih_wptr - get the IH ring buffer wptr
5451  *
5452  * @rdev: radeon_device pointer
5453  *
5454  * Get the IH ring buffer wptr from either the register
5455  * or the writeback memory buffer (CIK).  Also check for
5456  * ring buffer overflow and deal with it.
5457  * Used by cik_irq_process().
5458  * Returns the value of the wptr.
5459  */
5460 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5461 {
5462         u32 wptr, tmp;
5463
5464         if (rdev->wb.enabled)
5465                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5466         else
5467                 wptr = RREG32(IH_RB_WPTR);
5468
5469         if (wptr & RB_OVERFLOW) {
5470                 /* When a ring buffer overflow happen start parsing interrupt
5471                  * from the last not overwritten vector (wptr + 16). Hopefully
5472                  * this should allow us to catchup.
5473                  */
5474                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5475                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5476                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5477                 tmp = RREG32(IH_RB_CNTL);
5478                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5479                 WREG32(IH_RB_CNTL, tmp);
5480         }
5481         return (wptr & rdev->ih.ptr_mask);
5482 }
5483
5484 /*        CIK IV Ring
5485  * Each IV ring entry is 128 bits:
5486  * [7:0]    - interrupt source id
5487  * [31:8]   - reserved
5488  * [59:32]  - interrupt source data
5489  * [63:60]  - reserved
5490  * [71:64]  - RINGID
5491  *            CP:
5492  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5493  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5494  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5495  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5496  *            PIPE_ID - ME0 0=3D
5497  *                    - ME1&2 compute dispatcher (4 pipes each)
5498  *            SDMA:
5499  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
5500  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
5501  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5502  * [79:72]  - VMID
5503  * [95:80]  - PASID
5504  * [127:96] - reserved
5505  */
5506 /**
5507  * cik_irq_process - interrupt handler
5508  *
5509  * @rdev: radeon_device pointer
5510  *
5511  * Interrupt hander (CIK).  Walk the IH ring,
5512  * ack interrupts and schedule work to handle
5513  * interrupt events.
5514  * Returns irq process return code.
5515  */
5516 irqreturn_t cik_irq_process(struct radeon_device *rdev)
5517 {
5518         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5519         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5520         u32 wptr;
5521         u32 rptr;
5522         u32 src_id, src_data, ring_id;
5523         u8 me_id, pipe_id, queue_id;
5524         u32 ring_index;
5525         bool queue_hotplug = false;
5526         bool queue_reset = false;
5527         u32 addr, status, mc_client;
5528
5529         if (!rdev->ih.enabled || rdev->shutdown)
5530                 return IRQ_NONE;
5531
5532         wptr = cik_get_ih_wptr(rdev);
5533
5534 restart_ih:
5535         /* is somebody else already processing irqs? */
5536         if (atomic_xchg(&rdev->ih.lock, 1))
5537                 return IRQ_NONE;
5538
5539         rptr = rdev->ih.rptr;
5540         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5541
5542         /* Order reading of wptr vs. reading of IH ring data */
5543         rmb();
5544
5545         /* display interrupts */
5546         cik_irq_ack(rdev);
5547
5548         while (rptr != wptr) {
5549                 /* wptr/rptr are in bytes! */
5550                 ring_index = rptr / 4;
5551                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5552                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5553                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5554
5555                 switch (src_id) {
5556                 case 1: /* D1 vblank/vline */
5557                         switch (src_data) {
5558                         case 0: /* D1 vblank */
5559                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5560                                         if (rdev->irq.crtc_vblank_int[0]) {
5561                                                 drm_handle_vblank(rdev->ddev, 0);
5562                                                 rdev->pm.vblank_sync = true;
5563                                                 wake_up(&rdev->irq.vblank_queue);
5564                                         }
5565                                         if (atomic_read(&rdev->irq.pflip[0]))
5566                                                 radeon_crtc_handle_flip(rdev, 0);
5567                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5568                                         DRM_DEBUG("IH: D1 vblank\n");
5569                                 }
5570                                 break;
5571                         case 1: /* D1 vline */
5572                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5573                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5574                                         DRM_DEBUG("IH: D1 vline\n");
5575                                 }
5576                                 break;
5577                         default:
5578                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5579                                 break;
5580                         }
5581                         break;
5582                 case 2: /* D2 vblank/vline */
5583                         switch (src_data) {
5584                         case 0: /* D2 vblank */
5585                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5586                                         if (rdev->irq.crtc_vblank_int[1]) {
5587                                                 drm_handle_vblank(rdev->ddev, 1);
5588                                                 rdev->pm.vblank_sync = true;
5589                                                 wake_up(&rdev->irq.vblank_queue);
5590                                         }
5591                                         if (atomic_read(&rdev->irq.pflip[1]))
5592                                                 radeon_crtc_handle_flip(rdev, 1);
5593                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5594                                         DRM_DEBUG("IH: D2 vblank\n");
5595                                 }
5596                                 break;
5597                         case 1: /* D2 vline */
5598                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5599                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5600                                         DRM_DEBUG("IH: D2 vline\n");
5601                                 }
5602                                 break;
5603                         default:
5604                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5605                                 break;
5606                         }
5607                         break;
5608                 case 3: /* D3 vblank/vline */
5609                         switch (src_data) {
5610                         case 0: /* D3 vblank */
5611                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5612                                         if (rdev->irq.crtc_vblank_int[2]) {
5613                                                 drm_handle_vblank(rdev->ddev, 2);
5614                                                 rdev->pm.vblank_sync = true;
5615                                                 wake_up(&rdev->irq.vblank_queue);
5616                                         }
5617                                         if (atomic_read(&rdev->irq.pflip[2]))
5618                                                 radeon_crtc_handle_flip(rdev, 2);
5619                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5620                                         DRM_DEBUG("IH: D3 vblank\n");
5621                                 }
5622                                 break;
5623                         case 1: /* D3 vline */
5624                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5625                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5626                                         DRM_DEBUG("IH: D3 vline\n");
5627                                 }
5628                                 break;
5629                         default:
5630                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5631                                 break;
5632                         }
5633                         break;
5634                 case 4: /* D4 vblank/vline */
5635                         switch (src_data) {
5636                         case 0: /* D4 vblank */
5637                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5638                                         if (rdev->irq.crtc_vblank_int[3]) {
5639                                                 drm_handle_vblank(rdev->ddev, 3);
5640                                                 rdev->pm.vblank_sync = true;
5641                                                 wake_up(&rdev->irq.vblank_queue);
5642                                         }
5643                                         if (atomic_read(&rdev->irq.pflip[3]))
5644                                                 radeon_crtc_handle_flip(rdev, 3);
5645                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5646                                         DRM_DEBUG("IH: D4 vblank\n");
5647                                 }
5648                                 break;
5649                         case 1: /* D4 vline */
5650                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5651                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5652                                         DRM_DEBUG("IH: D4 vline\n");
5653                                 }
5654                                 break;
5655                         default:
5656                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5657                                 break;
5658                         }
5659                         break;
5660                 case 5: /* D5 vblank/vline */
5661                         switch (src_data) {
5662                         case 0: /* D5 vblank */
5663                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5664                                         if (rdev->irq.crtc_vblank_int[4]) {
5665                                                 drm_handle_vblank(rdev->ddev, 4);
5666                                                 rdev->pm.vblank_sync = true;
5667                                                 wake_up(&rdev->irq.vblank_queue);
5668                                         }
5669                                         if (atomic_read(&rdev->irq.pflip[4]))
5670                                                 radeon_crtc_handle_flip(rdev, 4);
5671                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5672                                         DRM_DEBUG("IH: D5 vblank\n");
5673                                 }
5674                                 break;
5675                         case 1: /* D5 vline */
5676                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5677                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5678                                         DRM_DEBUG("IH: D5 vline\n");
5679                                 }
5680                                 break;
5681                         default:
5682                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5683                                 break;
5684                         }
5685                         break;
5686                 case 6: /* D6 vblank/vline */
5687                         switch (src_data) {
5688                         case 0: /* D6 vblank */
5689                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5690                                         if (rdev->irq.crtc_vblank_int[5]) {
5691                                                 drm_handle_vblank(rdev->ddev, 5);
5692                                                 rdev->pm.vblank_sync = true;
5693                                                 wake_up(&rdev->irq.vblank_queue);
5694                                         }
5695                                         if (atomic_read(&rdev->irq.pflip[5]))
5696                                                 radeon_crtc_handle_flip(rdev, 5);
5697                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5698                                         DRM_DEBUG("IH: D6 vblank\n");
5699                                 }
5700                                 break;
5701                         case 1: /* D6 vline */
5702                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5703                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5704                                         DRM_DEBUG("IH: D6 vline\n");
5705                                 }
5706                                 break;
5707                         default:
5708                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5709                                 break;
5710                         }
5711                         break;
5712                 case 42: /* HPD hotplug */
5713                         switch (src_data) {
5714                         case 0:
5715                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5716                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5717                                         queue_hotplug = true;
5718                                         DRM_DEBUG("IH: HPD1\n");
5719                                 }
5720                                 break;
5721                         case 1:
5722                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5723                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5724                                         queue_hotplug = true;
5725                                         DRM_DEBUG("IH: HPD2\n");
5726                                 }
5727                                 break;
5728                         case 2:
5729                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5730                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5731                                         queue_hotplug = true;
5732                                         DRM_DEBUG("IH: HPD3\n");
5733                                 }
5734                                 break;
5735                         case 3:
5736                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5737                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5738                                         queue_hotplug = true;
5739                                         DRM_DEBUG("IH: HPD4\n");
5740                                 }
5741                                 break;
5742                         case 4:
5743                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5744                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5745                                         queue_hotplug = true;
5746                                         DRM_DEBUG("IH: HPD5\n");
5747                                 }
5748                                 break;
5749                         case 5:
5750                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5751                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5752                                         queue_hotplug = true;
5753                                         DRM_DEBUG("IH: HPD6\n");
5754                                 }
5755                                 break;
5756                         default:
5757                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5758                                 break;
5759                         }
5760                         break;
5761                 case 146:
5762                 case 147:
5763                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5764                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5765                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5766                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5767                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5768                                 addr);
5769                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5770                                 status);
5771                         cik_vm_decode_fault(rdev, status, addr, mc_client);
5772                         /* reset addr and status */
5773                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5774                         break;
5775                 case 176: /* GFX RB CP_INT */
5776                 case 177: /* GFX IB CP_INT */
5777                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5778                         break;
5779                 case 181: /* CP EOP event */
5780                         DRM_DEBUG("IH: CP EOP\n");
5781                         /* XXX check the bitfield order! */
5782                         me_id = (ring_id & 0x60) >> 5;
5783                         pipe_id = (ring_id & 0x18) >> 3;
5784                         queue_id = (ring_id & 0x7) >> 0;
5785                         switch (me_id) {
5786                         case 0:
5787                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5788                                 break;
5789                         case 1:
5790                         case 2:
5791                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5792                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5793                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5794                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5795                                 break;
5796                         }
5797                         break;
5798                 case 184: /* CP Privileged reg access */
5799                         DRM_ERROR("Illegal register access in command stream\n");
5800                         /* XXX check the bitfield order! */
5801                         me_id = (ring_id & 0x60) >> 5;
5802                         pipe_id = (ring_id & 0x18) >> 3;
5803                         queue_id = (ring_id & 0x7) >> 0;
5804                         switch (me_id) {
5805                         case 0:
5806                                 /* This results in a full GPU reset, but all we need to do is soft
5807                                  * reset the CP for gfx
5808                                  */
5809                                 queue_reset = true;
5810                                 break;
5811                         case 1:
5812                                 /* XXX compute */
5813                                 queue_reset = true;
5814                                 break;
5815                         case 2:
5816                                 /* XXX compute */
5817                                 queue_reset = true;
5818                                 break;
5819                         }
5820                         break;
5821                 case 185: /* CP Privileged inst */
5822                         DRM_ERROR("Illegal instruction in command stream\n");
5823                         /* XXX check the bitfield order! */
5824                         me_id = (ring_id & 0x60) >> 5;
5825                         pipe_id = (ring_id & 0x18) >> 3;
5826                         queue_id = (ring_id & 0x7) >> 0;
5827                         switch (me_id) {
5828                         case 0:
5829                                 /* This results in a full GPU reset, but all we need to do is soft
5830                                  * reset the CP for gfx
5831                                  */
5832                                 queue_reset = true;
5833                                 break;
5834                         case 1:
5835                                 /* XXX compute */
5836                                 queue_reset = true;
5837                                 break;
5838                         case 2:
5839                                 /* XXX compute */
5840                                 queue_reset = true;
5841                                 break;
5842                         }
5843                         break;
5844                 case 224: /* SDMA trap event */
5845                         /* XXX check the bitfield order! */
5846                         me_id = (ring_id & 0x3) >> 0;
5847                         queue_id = (ring_id & 0xc) >> 2;
5848                         DRM_DEBUG("IH: SDMA trap\n");
5849                         switch (me_id) {
5850                         case 0:
5851                                 switch (queue_id) {
5852                                 case 0:
5853                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5854                                         break;
5855                                 case 1:
5856                                         /* XXX compute */
5857                                         break;
5858                                 case 2:
5859                                         /* XXX compute */
5860                                         break;
5861                                 }
5862                                 break;
5863                         case 1:
5864                                 switch (queue_id) {
5865                                 case 0:
5866                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5867                                         break;
5868                                 case 1:
5869                                         /* XXX compute */
5870                                         break;
5871                                 case 2:
5872                                         /* XXX compute */
5873                                         break;
5874                                 }
5875                                 break;
5876                         }
5877                         break;
5878                 case 241: /* SDMA Privileged inst */
5879                 case 247: /* SDMA Privileged inst */
5880                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
5881                         /* XXX check the bitfield order! */
5882                         me_id = (ring_id & 0x3) >> 0;
5883                         queue_id = (ring_id & 0xc) >> 2;
5884                         switch (me_id) {
5885                         case 0:
5886                                 switch (queue_id) {
5887                                 case 0:
5888                                         queue_reset = true;
5889                                         break;
5890                                 case 1:
5891                                         /* XXX compute */
5892                                         queue_reset = true;
5893                                         break;
5894                                 case 2:
5895                                         /* XXX compute */
5896                                         queue_reset = true;
5897                                         break;
5898                                 }
5899                                 break;
5900                         case 1:
5901                                 switch (queue_id) {
5902                                 case 0:
5903                                         queue_reset = true;
5904                                         break;
5905                                 case 1:
5906                                         /* XXX compute */
5907                                         queue_reset = true;
5908                                         break;
5909                                 case 2:
5910                                         /* XXX compute */
5911                                         queue_reset = true;
5912                                         break;
5913                                 }
5914                                 break;
5915                         }
5916                         break;
5917                 case 233: /* GUI IDLE */
5918                         DRM_DEBUG("IH: GUI idle\n");
5919                         break;
5920                 default:
5921                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5922                         break;
5923                 }
5924
5925                 /* wptr/rptr are in bytes! */
5926                 rptr += 16;
5927                 rptr &= rdev->ih.ptr_mask;
5928         }
5929         if (queue_hotplug)
5930                 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
5931         if (queue_reset)
5932                 taskqueue_enqueue(rdev->tq, &rdev->reset_work);
5933         rdev->ih.rptr = rptr;
5934         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5935         atomic_set(&rdev->ih.lock, 0);
5936
5937         /* make sure wptr hasn't changed while processing */
5938         wptr = cik_get_ih_wptr(rdev);
5939         if (wptr != rptr)
5940                 goto restart_ih;
5941
5942         return IRQ_HANDLED;
5943 }
5944
5945 /*
5946  * startup/shutdown callbacks
5947  */
5948 /**
5949  * cik_startup - program the asic to a functional state
5950  *
5951  * @rdev: radeon_device pointer
5952  *
5953  * Programs the asic to a functional state (CIK).
5954  * Called by cik_init() and cik_resume().
5955  * Returns 0 for success, error for failure.
5956  */
5957 static int cik_startup(struct radeon_device *rdev)
5958 {
5959         struct radeon_ring *ring;
5960         int r;
5961
5962         cik_mc_program(rdev);
5963
5964         if (rdev->flags & RADEON_IS_IGP) {
5965                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5966                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5967                         r = cik_init_microcode(rdev);
5968                         if (r) {
5969                                 DRM_ERROR("Failed to load firmware!\n");
5970                                 return r;
5971                         }
5972                 }
5973         } else {
5974                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5975                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5976                     !rdev->mc_fw) {
5977                         r = cik_init_microcode(rdev);
5978                         if (r) {
5979                                 DRM_ERROR("Failed to load firmware!\n");
5980                                 return r;
5981                         }
5982                 }
5983
5984                 r = ci_mc_load_microcode(rdev);
5985                 if (r) {
5986                         DRM_ERROR("Failed to load MC firmware!\n");
5987                         return r;
5988                 }
5989         }
5990
5991         r = r600_vram_scratch_init(rdev);
5992         if (r)
5993                 return r;
5994
5995         r = cik_pcie_gart_enable(rdev);
5996         if (r)
5997                 return r;
5998         cik_gpu_init(rdev);
5999
6000         /* allocate rlc buffers */
6001         r = si_rlc_init(rdev);
6002         if (r) {
6003                 DRM_ERROR("Failed to init rlc BOs!\n");
6004                 return r;
6005         }
6006
6007         /* allocate wb buffer */
6008         r = radeon_wb_init(rdev);
6009         if (r)
6010                 return r;
6011
6012         /* allocate mec buffers */
6013         r = cik_mec_init(rdev);
6014         if (r) {
6015                 DRM_ERROR("Failed to init MEC BOs!\n");
6016                 return r;
6017         }
6018
6019         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6020         if (r) {
6021                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6022                 return r;
6023         }
6024
6025         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6026         if (r) {
6027                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6028                 return r;
6029         }
6030
6031         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6032         if (r) {
6033                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6034                 return r;
6035         }
6036
6037         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6038         if (r) {
6039                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6040                 return r;
6041         }
6042
6043         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6044         if (r) {
6045                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6046                 return r;
6047         }
6048
6049         r = cik_uvd_resume(rdev);
6050         if (!r) {
6051                 r = radeon_fence_driver_start_ring(rdev,
6052                                                    R600_RING_TYPE_UVD_INDEX);
6053                 if (r)
6054                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6055         }
6056         if (r)
6057                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6058
6059         /* Enable IRQ */
6060         if (!rdev->irq.installed) {
6061                 r = radeon_irq_kms_init(rdev);
6062                 if (r)
6063                         return r;
6064         }
6065
6066         r = cik_irq_init(rdev);
6067         if (r) {
6068                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6069                 radeon_irq_kms_fini(rdev);
6070                 return r;
6071         }
6072         cik_irq_set(rdev);
6073
6074         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6075         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6076                              CP_RB0_RPTR, CP_RB0_WPTR,
6077                              0, 0xfffff, RADEON_CP_PACKET2);
6078         if (r)
6079                 return r;
6080
6081         /* set up the compute queues */
6082         /* type-2 packets are deprecated on MEC, use type-3 instead */
6083         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6084         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6085                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6086                              0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6087         if (r)
6088                 return r;
6089         ring->me = 1; /* first MEC */
6090         ring->pipe = 0; /* first pipe */
6091         ring->queue = 0; /* first queue */
6092         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6093
6094         /* type-2 packets are deprecated on MEC, use type-3 instead */
6095         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6096         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6097                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6098                              0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6099         if (r)
6100                 return r;
6101         /* dGPU only have 1 MEC */
6102         ring->me = 1; /* first MEC */
6103         ring->pipe = 0; /* first pipe */
6104         ring->queue = 1; /* second queue */
6105         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6106
6107         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6108         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6109                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6110                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6111                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6112         if (r)
6113                 return r;
6114
6115         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6116         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6117                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6118                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6119                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6120         if (r)
6121                 return r;
6122
6123         r = cik_cp_resume(rdev);
6124         if (r)
6125                 return r;
6126
6127         r = cik_sdma_resume(rdev);
6128         if (r)
6129                 return r;
6130
6131         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6132         if (ring->ring_size) {
6133                 r = radeon_ring_init(rdev, ring, ring->ring_size,
6134                                      R600_WB_UVD_RPTR_OFFSET,
6135                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6136                                      0, 0xfffff, RADEON_CP_PACKET2);
6137                 if (!r)
6138                         r = r600_uvd_init(rdev);
6139                 if (r)
6140                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6141         }
6142
6143         r = radeon_ib_pool_init(rdev);
6144         if (r) {
6145                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6146                 return r;
6147         }
6148
6149         r = radeon_vm_manager_init(rdev);
6150         if (r) {
6151                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6152                 return r;
6153         }
6154
6155         return 0;
6156 }
6157
6158 /**
6159  * cik_resume - resume the asic to a functional state
6160  *
6161  * @rdev: radeon_device pointer
6162  *
6163  * Programs the asic to a functional state (CIK).
6164  * Called at resume.
6165  * Returns 0 for success, error for failure.
6166  */
6167 int cik_resume(struct radeon_device *rdev)
6168 {
6169         int r;
6170
6171         /* post card */
6172         atom_asic_init(rdev->mode_info.atom_context);
6173
6174         /* init golden registers */
6175         cik_init_golden_registers(rdev);
6176
6177         rdev->accel_working = true;
6178         r = cik_startup(rdev);
6179         if (r) {
6180                 DRM_ERROR("cik startup failed on resume\n");
6181                 rdev->accel_working = false;
6182                 return r;
6183         }
6184
6185         return r;
6186
6187 }
6188
6189 /**
6190  * cik_suspend - suspend the asic
6191  *
6192  * @rdev: radeon_device pointer
6193  *
6194  * Bring the chip into a state suitable for suspend (CIK).
6195  * Called at suspend.
6196  * Returns 0 for success.
6197  */
6198 int cik_suspend(struct radeon_device *rdev)
6199 {
6200         radeon_vm_manager_fini(rdev);
6201         cik_cp_enable(rdev, false);
6202         cik_sdma_enable(rdev, false);
6203         r600_uvd_stop(rdev);
6204         radeon_uvd_suspend(rdev);
6205         cik_irq_suspend(rdev);
6206         radeon_wb_disable(rdev);
6207         cik_pcie_gart_disable(rdev);
6208         return 0;
6209 }
6210
6211 /* Plan is to move initialization in that function and use
6212  * helper function so that radeon_device_init pretty much
6213  * do nothing more than calling asic specific function. This
6214  * should also allow to remove a bunch of callback function
6215  * like vram_info.
6216  */
6217 /**
6218  * cik_init - asic specific driver and hw init
6219  *
6220  * @rdev: radeon_device pointer
6221  *
6222  * Setup asic specific driver variables and program the hw
6223  * to a functional state (CIK).
6224  * Called at driver startup.
6225  * Returns 0 for success, errors for failure.
6226  */
6227 int cik_init(struct radeon_device *rdev)
6228 {
6229         struct radeon_ring *ring;
6230         int r;
6231
6232         /* Read BIOS */
6233         if (!radeon_get_bios(rdev)) {
6234                 if (ASIC_IS_AVIVO(rdev))
6235                         return -EINVAL;
6236         }
6237         /* Must be an ATOMBIOS */
6238         if (!rdev->is_atom_bios) {
6239                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6240                 return -EINVAL;
6241         }
6242         r = radeon_atombios_init(rdev);
6243         if (r)
6244                 return r;
6245
6246         /* Post card if necessary */
6247         if (!radeon_card_posted(rdev)) {
6248                 if (!rdev->bios) {
6249                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6250                         return -EINVAL;
6251                 }
6252                 DRM_INFO("GPU not posted. posting now...\n");
6253                 atom_asic_init(rdev->mode_info.atom_context);
6254         }
6255         /* init golden registers */
6256         cik_init_golden_registers(rdev);
6257         /* Initialize scratch registers */
6258         cik_scratch_init(rdev);
6259         /* Initialize surface registers */
6260         radeon_surface_init(rdev);
6261         /* Initialize clocks */
6262         radeon_get_clock_info(rdev->ddev);
6263
6264         /* Fence driver */
6265         r = radeon_fence_driver_init(rdev);
6266         if (r)
6267                 return r;
6268
6269         /* initialize memory controller */
6270         r = cik_mc_init(rdev);
6271         if (r)
6272                 return r;
6273         /* Memory manager */
6274         r = radeon_bo_init(rdev);
6275         if (r)
6276                 return r;
6277
6278         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6279         ring->ring_obj = NULL;
6280         r600_ring_init(rdev, ring, 1024 * 1024);
6281
6282         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6283         ring->ring_obj = NULL;
6284         r600_ring_init(rdev, ring, 1024 * 1024);
6285         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6286         if (r)
6287                 return r;
6288
6289         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6290         ring->ring_obj = NULL;
6291         r600_ring_init(rdev, ring, 1024 * 1024);
6292         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6293         if (r)
6294                 return r;
6295
6296         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6297         ring->ring_obj = NULL;
6298         r600_ring_init(rdev, ring, 256 * 1024);
6299
6300         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6301         ring->ring_obj = NULL;
6302         r600_ring_init(rdev, ring, 256 * 1024);
6303
6304         r = radeon_uvd_init(rdev);
6305         if (!r) {
6306                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6307                 ring->ring_obj = NULL;
6308                 r600_ring_init(rdev, ring, 4096);
6309         }
6310
6311         rdev->ih.ring_obj = NULL;
6312         r600_ih_ring_init(rdev, 64 * 1024);
6313
6314         r = r600_pcie_gart_init(rdev);
6315         if (r)
6316                 return r;
6317
6318         rdev->accel_working = true;
6319         r = cik_startup(rdev);
6320         if (r) {
6321                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6322                 cik_cp_fini(rdev);
6323                 cik_sdma_fini(rdev);
6324                 cik_irq_fini(rdev);
6325                 si_rlc_fini(rdev);
6326                 cik_mec_fini(rdev);
6327                 radeon_wb_fini(rdev);
6328                 radeon_ib_pool_fini(rdev);
6329                 radeon_vm_manager_fini(rdev);
6330                 radeon_irq_kms_fini(rdev);
6331                 cik_pcie_gart_fini(rdev);
6332                 rdev->accel_working = false;
6333         }
6334
6335         /* Don't start up if the MC ucode is missing.
6336          * The default clocks and voltages before the MC ucode
6337          * is loaded are not suffient for advanced operations.
6338          */
6339         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6340                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6341                 return -EINVAL;
6342         }
6343
6344         return 0;
6345 }
6346
6347 /**
6348  * cik_fini - asic specific driver and hw fini
6349  *
6350  * @rdev: radeon_device pointer
6351  *
6352  * Tear down the asic specific driver variables and program the hw
6353  * to an idle state (CIK).
6354  * Called at driver unload.
6355  */
6356 void cik_fini(struct radeon_device *rdev)
6357 {
6358         cik_cp_fini(rdev);
6359         cik_sdma_fini(rdev);
6360         cik_irq_fini(rdev);
6361         si_rlc_fini(rdev);
6362         cik_mec_fini(rdev);
6363         radeon_wb_fini(rdev);
6364         radeon_vm_manager_fini(rdev);
6365         radeon_ib_pool_fini(rdev);
6366         radeon_irq_kms_fini(rdev);
6367         r600_uvd_stop(rdev);
6368         radeon_uvd_fini(rdev);
6369         cik_pcie_gart_fini(rdev);
6370         r600_vram_scratch_fini(rdev);
6371         radeon_gem_fini(rdev);
6372         radeon_fence_driver_fini(rdev);
6373         radeon_bo_fini(rdev);
6374         radeon_atombios_fini(rdev);
6375         kfree(rdev->bios);
6376         rdev->bios = NULL;
6377 }
6378
6379 /* display watermark setup */
6380 /**
6381  * dce8_line_buffer_adjust - Set up the line buffer
6382  *
6383  * @rdev: radeon_device pointer
6384  * @radeon_crtc: the selected display controller
6385  * @mode: the current display mode on the selected display
6386  * controller
6387  *
6388  * Setup up the line buffer allocation for
6389  * the selected display controller (CIK).
6390  * Returns the line buffer size in pixels.
6391  */
6392 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6393                                    struct radeon_crtc *radeon_crtc,
6394                                    struct drm_display_mode *mode)
6395 {
6396         u32 tmp;
6397
6398         /*
6399          * Line Buffer Setup
6400          * There are 6 line buffers, one for each display controllers.
6401          * There are 3 partitions per LB. Select the number of partitions
6402          * to enable based on the display width.  For display widths larger
6403          * than 4096, you need use to use 2 display controllers and combine
6404          * them using the stereo blender.
6405          */
6406         if (radeon_crtc->base.enabled && mode) {
6407                 if (mode->crtc_hdisplay < 1920)
6408                         tmp = 1;
6409                 else if (mode->crtc_hdisplay < 2560)
6410                         tmp = 2;
6411                 else if (mode->crtc_hdisplay < 4096)
6412                         tmp = 0;
6413                 else {
6414                         DRM_DEBUG_KMS("Mode too big for LB!\n");
6415                         tmp = 0;
6416                 }
6417         } else
6418                 tmp = 1;
6419
6420         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6421                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6422
6423         if (radeon_crtc->base.enabled && mode) {
6424                 switch (tmp) {
6425                 case 0:
6426                 default:
6427                         return 4096 * 2;
6428                 case 1:
6429                         return 1920 * 2;
6430                 case 2:
6431                         return 2560 * 2;
6432                 }
6433         }
6434
6435         /* controller not enabled, so no lb used */
6436         return 0;
6437 }
6438
6439 /**
6440  * cik_get_number_of_dram_channels - get the number of dram channels
6441  *
6442  * @rdev: radeon_device pointer
6443  *
6444  * Look up the number of video ram channels (CIK).
6445  * Used for display watermark bandwidth calculations
6446  * Returns the number of dram channels
6447  */
6448 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6449 {
6450         u32 tmp = RREG32(MC_SHARED_CHMAP);
6451
6452         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6453         case 0:
6454         default:
6455                 return 1;
6456         case 1:
6457                 return 2;
6458         case 2:
6459                 return 4;
6460         case 3:
6461                 return 8;
6462         case 4:
6463                 return 3;
6464         case 5:
6465                 return 6;
6466         case 6:
6467                 return 10;
6468         case 7:
6469                 return 12;
6470         case 8:
6471                 return 16;
6472         }
6473 }
6474
6475 struct dce8_wm_params {
6476         u32 dram_channels; /* number of dram channels */
6477         u32 yclk;          /* bandwidth per dram data pin in kHz */
6478         u32 sclk;          /* engine clock in kHz */
6479         u32 disp_clk;      /* display clock in kHz */
6480         u32 src_width;     /* viewport width */
6481         u32 active_time;   /* active display time in ns */
6482         u32 blank_time;    /* blank time in ns */
6483         bool interlaced;    /* mode is interlaced */
6484         fixed20_12 vsc;    /* vertical scale ratio */
6485         u32 num_heads;     /* number of active crtcs */
6486         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6487         u32 lb_size;       /* line buffer allocated to pipe */
6488         u32 vtaps;         /* vertical scaler taps */
6489 };
6490
6491 /**
6492  * dce8_dram_bandwidth - get the dram bandwidth
6493  *
6494  * @wm: watermark calculation data
6495  *
6496  * Calculate the raw dram bandwidth (CIK).
6497  * Used for display watermark bandwidth calculations
6498  * Returns the dram bandwidth in MBytes/s
6499  */
6500 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6501 {
6502         /* Calculate raw DRAM Bandwidth */
6503         fixed20_12 dram_efficiency; /* 0.7 */
6504         fixed20_12 yclk, dram_channels, bandwidth;
6505         fixed20_12 a;
6506
6507         a.full = dfixed_const(1000);
6508         yclk.full = dfixed_const(wm->yclk);
6509         yclk.full = dfixed_div(yclk, a);
6510         dram_channels.full = dfixed_const(wm->dram_channels * 4);
6511         a.full = dfixed_const(10);
6512         dram_efficiency.full = dfixed_const(7);
6513         dram_efficiency.full = dfixed_div(dram_efficiency, a);
6514         bandwidth.full = dfixed_mul(dram_channels, yclk);
6515         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6516
6517         return dfixed_trunc(bandwidth);
6518 }
6519
6520 /**
6521  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6522  *
6523  * @wm: watermark calculation data
6524  *
6525  * Calculate the dram bandwidth used for display (CIK).
6526  * Used for display watermark bandwidth calculations
6527  * Returns the dram bandwidth for display in MBytes/s
6528  */
6529 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6530 {
6531         /* Calculate DRAM Bandwidth and the part allocated to display. */
6532         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6533         fixed20_12 yclk, dram_channels, bandwidth;
6534         fixed20_12 a;
6535
6536         a.full = dfixed_const(1000);
6537         yclk.full = dfixed_const(wm->yclk);
6538         yclk.full = dfixed_div(yclk, a);
6539         dram_channels.full = dfixed_const(wm->dram_channels * 4);
6540         a.full = dfixed_const(10);
6541         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6542         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6543         bandwidth.full = dfixed_mul(dram_channels, yclk);
6544         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6545
6546         return dfixed_trunc(bandwidth);
6547 }
6548
6549 /**
6550  * dce8_data_return_bandwidth - get the data return bandwidth
6551  *
6552  * @wm: watermark calculation data
6553  *
6554  * Calculate the data return bandwidth used for display (CIK).
6555  * Used for display watermark bandwidth calculations
6556  * Returns the data return bandwidth in MBytes/s
6557  */
6558 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6559 {
6560         /* Calculate the display Data return Bandwidth */
6561         fixed20_12 return_efficiency; /* 0.8 */
6562         fixed20_12 sclk, bandwidth;
6563         fixed20_12 a;
6564
6565         a.full = dfixed_const(1000);
6566         sclk.full = dfixed_const(wm->sclk);
6567         sclk.full = dfixed_div(sclk, a);
6568         a.full = dfixed_const(10);
6569         return_efficiency.full = dfixed_const(8);
6570         return_efficiency.full = dfixed_div(return_efficiency, a);
6571         a.full = dfixed_const(32);
6572         bandwidth.full = dfixed_mul(a, sclk);
6573         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6574
6575         return dfixed_trunc(bandwidth);
6576 }
6577
6578 /**
6579  * dce8_dmif_request_bandwidth - get the dmif bandwidth
6580  *
6581  * @wm: watermark calculation data
6582  *
6583  * Calculate the dmif bandwidth used for display (CIK).
6584  * Used for display watermark bandwidth calculations
6585  * Returns the dmif bandwidth in MBytes/s
6586  */
6587 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6588 {
6589         /* Calculate the DMIF Request Bandwidth */
6590         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6591         fixed20_12 disp_clk, bandwidth;
6592         fixed20_12 a, b;
6593
6594         a.full = dfixed_const(1000);
6595         disp_clk.full = dfixed_const(wm->disp_clk);
6596         disp_clk.full = dfixed_div(disp_clk, a);
6597         a.full = dfixed_const(32);
6598         b.full = dfixed_mul(a, disp_clk);
6599
6600         a.full = dfixed_const(10);
6601         disp_clk_request_efficiency.full = dfixed_const(8);
6602         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6603
6604         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6605
6606         return dfixed_trunc(bandwidth);
6607 }
6608
6609 /**
6610  * dce8_available_bandwidth - get the min available bandwidth
6611  *
6612  * @wm: watermark calculation data
6613  *
6614  * Calculate the min available bandwidth used for display (CIK).
6615  * Used for display watermark bandwidth calculations
6616  * Returns the min available bandwidth in MBytes/s
6617  */
6618 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6619 {
6620         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6621         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6622         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6623         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6624
6625         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6626 }
6627
6628 /**
6629  * dce8_average_bandwidth - get the average available bandwidth
6630  *
6631  * @wm: watermark calculation data
6632  *
6633  * Calculate the average available bandwidth used for display (CIK).
6634  * Used for display watermark bandwidth calculations
6635  * Returns the average available bandwidth in MBytes/s
6636  */
6637 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6638 {
6639         /* Calculate the display mode Average Bandwidth
6640          * DisplayMode should contain the source and destination dimensions,
6641          * timing, etc.
6642          */
6643         fixed20_12 bpp;
6644         fixed20_12 line_time;
6645         fixed20_12 src_width;
6646         fixed20_12 bandwidth;
6647         fixed20_12 a;
6648
6649         a.full = dfixed_const(1000);
6650         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6651         line_time.full = dfixed_div(line_time, a);
6652         bpp.full = dfixed_const(wm->bytes_per_pixel);
6653         src_width.full = dfixed_const(wm->src_width);
6654         bandwidth.full = dfixed_mul(src_width, bpp);
6655         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6656         bandwidth.full = dfixed_div(bandwidth, line_time);
6657
6658         return dfixed_trunc(bandwidth);
6659 }
6660
6661 /**
6662  * dce8_latency_watermark - get the latency watermark
6663  *
6664  * @wm: watermark calculation data
6665  *
6666  * Calculate the latency watermark (CIK).
6667  * Used for display watermark bandwidth calculations
6668  * Returns the latency watermark in ns
6669  */
6670 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6671 {
6672         /* First calculate the latency in ns */
6673         u32 mc_latency = 2000; /* 2000 ns. */
6674         u32 available_bandwidth = dce8_available_bandwidth(wm);
6675         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6676         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6677         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6678         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6679                 (wm->num_heads * cursor_line_pair_return_time);
6680         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6681         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6682         u32 tmp, dmif_size = 12288;
6683         fixed20_12 a, b, c;
6684
6685         if (wm->num_heads == 0)
6686                 return 0;
6687
6688         a.full = dfixed_const(2);
6689         b.full = dfixed_const(1);
6690         if ((wm->vsc.full > a.full) ||
6691             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6692             (wm->vtaps >= 5) ||
6693             ((wm->vsc.full >= a.full) && wm->interlaced))
6694                 max_src_lines_per_dst_line = 4;
6695         else
6696                 max_src_lines_per_dst_line = 2;
6697
6698         a.full = dfixed_const(available_bandwidth);
6699         b.full = dfixed_const(wm->num_heads);
6700         a.full = dfixed_div(a, b);
6701
6702         b.full = dfixed_const(mc_latency + 512);
6703         c.full = dfixed_const(wm->disp_clk);
6704         b.full = dfixed_div(b, c);
6705
6706         c.full = dfixed_const(dmif_size);
6707         b.full = dfixed_div(c, b);
6708
6709         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6710
6711         b.full = dfixed_const(1000);
6712         c.full = dfixed_const(wm->disp_clk);
6713         b.full = dfixed_div(c, b);
6714         c.full = dfixed_const(wm->bytes_per_pixel);
6715         b.full = dfixed_mul(b, c);
6716
6717         lb_fill_bw = min(tmp, dfixed_trunc(b));
6718
6719         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6720         b.full = dfixed_const(1000);
6721         c.full = dfixed_const(lb_fill_bw);
6722         b.full = dfixed_div(c, b);
6723         a.full = dfixed_div(a, b);
6724         line_fill_time = dfixed_trunc(a);
6725
6726         if (line_fill_time < wm->active_time)
6727                 return latency;
6728         else
6729                 return latency + (line_fill_time - wm->active_time);
6730
6731 }
6732
6733 /**
6734  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6735  * average and available dram bandwidth
6736  *
6737  * @wm: watermark calculation data
6738  *
6739  * Check if the display average bandwidth fits in the display
6740  * dram bandwidth (CIK).
6741  * Used for display watermark bandwidth calculations
6742  * Returns true if the display fits, false if not.
6743  */
6744 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6745 {
6746         if (dce8_average_bandwidth(wm) <=
6747             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6748                 return true;
6749         else
6750                 return false;
6751 }
6752
6753 /**
6754  * dce8_average_bandwidth_vs_available_bandwidth - check
6755  * average and available bandwidth
6756  *
6757  * @wm: watermark calculation data
6758  *
6759  * Check if the display average bandwidth fits in the display
6760  * available bandwidth (CIK).
6761  * Used for display watermark bandwidth calculations
6762  * Returns true if the display fits, false if not.
6763  */
6764 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6765 {
6766         if (dce8_average_bandwidth(wm) <=
6767             (dce8_available_bandwidth(wm) / wm->num_heads))
6768                 return true;
6769         else
6770                 return false;
6771 }
6772
6773 /**
6774  * dce8_check_latency_hiding - check latency hiding
6775  *
6776  * @wm: watermark calculation data
6777  *
6778  * Check latency hiding (CIK).
6779  * Used for display watermark bandwidth calculations
6780  * Returns true if the display fits, false if not.
6781  */
6782 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6783 {
6784         u32 lb_partitions = wm->lb_size / wm->src_width;
6785         u32 line_time = wm->active_time + wm->blank_time;
6786         u32 latency_tolerant_lines;
6787         u32 latency_hiding;
6788         fixed20_12 a;
6789
6790         a.full = dfixed_const(1);
6791         if (wm->vsc.full > a.full)
6792                 latency_tolerant_lines = 1;
6793         else {
6794                 if (lb_partitions <= (wm->vtaps + 1))
6795                         latency_tolerant_lines = 1;
6796                 else
6797                         latency_tolerant_lines = 2;
6798         }
6799
6800         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6801
6802         if (dce8_latency_watermark(wm) <= latency_hiding)
6803                 return true;
6804         else
6805                 return false;
6806 }
6807
6808 /**
6809  * dce8_program_watermarks - program display watermarks
6810  *
6811  * @rdev: radeon_device pointer
6812  * @radeon_crtc: the selected display controller
6813  * @lb_size: line buffer size
6814  * @num_heads: number of display controllers in use
6815  *
6816  * Calculate and program the display watermarks for the
6817  * selected display controller (CIK).
6818  */
6819 static void dce8_program_watermarks(struct radeon_device *rdev,
6820                                     struct radeon_crtc *radeon_crtc,
6821                                     u32 lb_size, u32 num_heads)
6822 {
6823         struct drm_display_mode *mode = &radeon_crtc->base.mode;
6824         struct dce8_wm_params wm;
6825         u32 pixel_period;
6826         u32 line_time = 0;
6827         u32 latency_watermark_a = 0, latency_watermark_b = 0;
6828         u32 tmp, wm_mask;
6829
6830         if (radeon_crtc->base.enabled && num_heads && mode) {
6831                 pixel_period = 1000000 / (u32)mode->clock;
6832                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6833
6834                 wm.yclk = rdev->pm.current_mclk * 10;
6835                 wm.sclk = rdev->pm.current_sclk * 10;
6836                 wm.disp_clk = mode->clock;
6837                 wm.src_width = mode->crtc_hdisplay;
6838                 wm.active_time = mode->crtc_hdisplay * pixel_period;
6839                 wm.blank_time = line_time - wm.active_time;
6840                 wm.interlaced = false;
6841                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6842                         wm.interlaced = true;
6843                 wm.vsc = radeon_crtc->vsc;
6844                 wm.vtaps = 1;
6845                 if (radeon_crtc->rmx_type != RMX_OFF)
6846                         wm.vtaps = 2;
6847                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6848                 wm.lb_size = lb_size;
6849                 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6850                 wm.num_heads = num_heads;
6851
6852                 /* set for high clocks */
6853                 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6854                 /* set for low clocks */
6855                 /* wm.yclk = low clk; wm.sclk = low clk */
6856                 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6857
6858                 /* possibly force display priority to high */
6859                 /* should really do this at mode validation time... */
6860                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6861                     !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6862                     !dce8_check_latency_hiding(&wm) ||
6863                     (rdev->disp_priority == 2)) {
6864                         DRM_DEBUG_KMS("force priority to high\n");
6865                 }
6866         }
6867
6868         /* select wm A */
6869         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6870         tmp = wm_mask;
6871         tmp &= ~LATENCY_WATERMARK_MASK(3);
6872         tmp |= LATENCY_WATERMARK_MASK(1);
6873         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6874         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6875                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6876                 LATENCY_HIGH_WATERMARK(line_time)));
6877         /* select wm B */
6878         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6879         tmp &= ~LATENCY_WATERMARK_MASK(3);
6880         tmp |= LATENCY_WATERMARK_MASK(2);
6881         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6882         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6883                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6884                 LATENCY_HIGH_WATERMARK(line_time)));
6885         /* restore original selection */
6886         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6887 }
6888
6889 /**
6890  * dce8_bandwidth_update - program display watermarks
6891  *
6892  * @rdev: radeon_device pointer
6893  *
6894  * Calculate and program the display watermarks and line
6895  * buffer allocation (CIK).
6896  */
6897 void dce8_bandwidth_update(struct radeon_device *rdev)
6898 {
6899         struct drm_display_mode *mode = NULL;
6900         u32 num_heads = 0, lb_size;
6901         int i;
6902
6903         radeon_update_display_priority(rdev);
6904
6905         for (i = 0; i < rdev->num_crtc; i++) {
6906                 if (rdev->mode_info.crtcs[i]->base.enabled)
6907                         num_heads++;
6908         }
6909         for (i = 0; i < rdev->num_crtc; i++) {
6910                 mode = &rdev->mode_info.crtcs[i]->base.mode;
6911                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6912                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6913         }
6914 }
6915
6916 /**
6917  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6918  *
6919  * @rdev: radeon_device pointer
6920  *
6921  * Fetches a GPU clock counter snapshot (SI).
6922  * Returns the 64 bit clock counter snapshot.
6923  */
6924 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6925 {
6926         uint64_t clock;
6927
6928         spin_lock(&rdev->gpu_clock_mutex);
6929         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6930         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6931                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6932         spin_unlock(&rdev->gpu_clock_mutex);
6933         return clock;
6934 }
6935
6936 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6937                               u32 cntl_reg, u32 status_reg)
6938 {
6939         int r, i;
6940         struct atom_clock_dividers dividers;
6941         uint32_t tmp;
6942
6943         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6944                                            clock, false, &dividers);
6945         if (r)
6946                 return r;
6947
6948         tmp = RREG32_SMC(cntl_reg);
6949         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6950         tmp |= dividers.post_divider;
6951         WREG32_SMC(cntl_reg, tmp);
6952
6953         for (i = 0; i < 100; i++) {
6954                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6955                         break;
6956                 mdelay(10);
6957         }
6958         if (i == 100)
6959                 return -ETIMEDOUT;
6960
6961         return 0;
6962 }
6963
6964 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6965 {
6966         int r = 0;
6967
6968         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6969         if (r)
6970                 return r;
6971
6972         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6973         return r;
6974 }
6975
6976 int cik_uvd_resume(struct radeon_device *rdev)
6977 {
6978         uint64_t addr;
6979         uint32_t size;
6980         int r;
6981
6982         r = radeon_uvd_resume(rdev);
6983         if (r)
6984                 return r;
6985
6986         /* programm the VCPU memory controller bits 0-27 */
6987         addr = rdev->uvd.gpu_addr >> 3;
6988         size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 4) >> 3;
6989         WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6990         WREG32(UVD_VCPU_CACHE_SIZE0, size);
6991
6992         addr += size;
6993         size = RADEON_UVD_STACK_SIZE >> 3;
6994         WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6995         WREG32(UVD_VCPU_CACHE_SIZE1, size);
6996
6997         addr += size;
6998         size = RADEON_UVD_HEAP_SIZE >> 3;
6999         WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
7000         WREG32(UVD_VCPU_CACHE_SIZE2, size);
7001
7002         /* bits 28-31 */
7003         addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7004         WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7005
7006         /* bits 32-39 */
7007         addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7008         WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7009
7010         return 0;
7011 }