drm/radeon: Sync to Linux 3.11
[dragonfly.git] / sys / dev / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/module.h>
25 #include <linux/firmware.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "cikd.h"
30 #include "atom.h"
31 #include "cik_blit_shaders.h"
32
33 /* GFX */
34 #define CIK_PFP_UCODE_SIZE 2144
35 #define CIK_ME_UCODE_SIZE 2144
36 #define CIK_CE_UCODE_SIZE 2144
37 /* compute */
38 #define CIK_MEC_UCODE_SIZE 4192
39 /* interrupts */
40 #define BONAIRE_RLC_UCODE_SIZE 2048
41 #define KB_RLC_UCODE_SIZE 2560
42 #define KV_RLC_UCODE_SIZE 2560
43 /* gddr controller */
44 #define CIK_MC_UCODE_SIZE 7866
45 /* sdma */
46 #define CIK_SDMA_UCODE_SIZE 1050
47 #define CIK_SDMA_UCODE_VERSION 64
48
49 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
62 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
63 MODULE_FIRMWARE("radeon/KABINI_me.bin");
64 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
65 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
66 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
67 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
68
69 static void cik_rlc_stop(struct radeon_device *rdev);
70
71 /*
72  * Indirect registers accessor
73  */
74 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
75 {
76         u32 r;
77
78         WREG32(PCIE_INDEX, reg);
79         (void)RREG32(PCIE_INDEX);
80         r = RREG32(PCIE_DATA);
81         return r;
82 }
83
84 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
85 {
86         WREG32(PCIE_INDEX, reg);
87         (void)RREG32(PCIE_INDEX);
88         WREG32(PCIE_DATA, v);
89         (void)RREG32(PCIE_DATA);
90 }
91
92 static const u32 bonaire_golden_spm_registers[] =
93 {
94         0x30800, 0xe0ffffff, 0xe0000000
95 };
96
97 static const u32 bonaire_golden_common_registers[] =
98 {
99         0xc770, 0xffffffff, 0x00000800,
100         0xc774, 0xffffffff, 0x00000800,
101         0xc798, 0xffffffff, 0x00007fbf,
102         0xc79c, 0xffffffff, 0x00007faf
103 };
104
105 static const u32 bonaire_golden_registers[] =
106 {
107         0x3354, 0x00000333, 0x00000333,
108         0x3350, 0x000c0fc0, 0x00040200,
109         0x9a10, 0x00010000, 0x00058208,
110         0x3c000, 0xffff1fff, 0x00140000,
111         0x3c200, 0xfdfc0fff, 0x00000100,
112         0x3c234, 0x40000000, 0x40000200,
113         0x9830, 0xffffffff, 0x00000000,
114         0x9834, 0xf00fffff, 0x00000400,
115         0x9838, 0x0002021c, 0x00020200,
116         0xc78, 0x00000080, 0x00000000,
117         0x5bb0, 0x000000f0, 0x00000070,
118         0x5bc0, 0xf0311fff, 0x80300000,
119         0x98f8, 0x73773777, 0x12010001,
120         0x350c, 0x00810000, 0x408af000,
121         0x7030, 0x31000111, 0x00000011,
122         0x2f48, 0x73773777, 0x12010001,
123         0x220c, 0x00007fb6, 0x0021a1b1,
124         0x2210, 0x00007fb6, 0x002021b1,
125         0x2180, 0x00007fb6, 0x00002191,
126         0x2218, 0x00007fb6, 0x002121b1,
127         0x221c, 0x00007fb6, 0x002021b1,
128         0x21dc, 0x00007fb6, 0x00002191,
129         0x21e0, 0x00007fb6, 0x00002191,
130         0x3628, 0x0000003f, 0x0000000a,
131         0x362c, 0x0000003f, 0x0000000a,
132         0x2ae4, 0x00073ffe, 0x000022a2,
133         0x240c, 0x000007ff, 0x00000000,
134         0x8a14, 0xf000003f, 0x00000007,
135         0x8bf0, 0x00002001, 0x00000001,
136         0x8b24, 0xffffffff, 0x00ffffff,
137         0x30a04, 0x0000ff0f, 0x00000000,
138         0x28a4c, 0x07ffffff, 0x06000000,
139         0x4d8, 0x00000fff, 0x00000100,
140         0x3e78, 0x00000001, 0x00000002,
141         0x9100, 0x03000000, 0x0362c688,
142         0x8c00, 0x000000ff, 0x00000001,
143         0xe40, 0x00001fff, 0x00001fff,
144         0x9060, 0x0000007f, 0x00000020,
145         0x9508, 0x00010000, 0x00010000,
146         0xac14, 0x000003ff, 0x000000f3,
147         0xac0c, 0xffffffff, 0x00001032
148 };
149
150 static const u32 bonaire_mgcg_cgcg_init[] =
151 {
152         0xc420, 0xffffffff, 0xfffffffc,
153         0x30800, 0xffffffff, 0xe0000000,
154         0x3c2a0, 0xffffffff, 0x00000100,
155         0x3c208, 0xffffffff, 0x00000100,
156         0x3c2c0, 0xffffffff, 0xc0000100,
157         0x3c2c8, 0xffffffff, 0xc0000100,
158         0x3c2c4, 0xffffffff, 0xc0000100,
159         0x55e4, 0xffffffff, 0x00600100,
160         0x3c280, 0xffffffff, 0x00000100,
161         0x3c214, 0xffffffff, 0x06000100,
162         0x3c220, 0xffffffff, 0x00000100,
163         0x3c218, 0xffffffff, 0x06000100,
164         0x3c204, 0xffffffff, 0x00000100,
165         0x3c2e0, 0xffffffff, 0x00000100,
166         0x3c224, 0xffffffff, 0x00000100,
167         0x3c200, 0xffffffff, 0x00000100,
168         0x3c230, 0xffffffff, 0x00000100,
169         0x3c234, 0xffffffff, 0x00000100,
170         0x3c250, 0xffffffff, 0x00000100,
171         0x3c254, 0xffffffff, 0x00000100,
172         0x3c258, 0xffffffff, 0x00000100,
173         0x3c25c, 0xffffffff, 0x00000100,
174         0x3c260, 0xffffffff, 0x00000100,
175         0x3c27c, 0xffffffff, 0x00000100,
176         0x3c278, 0xffffffff, 0x00000100,
177         0x3c210, 0xffffffff, 0x06000100,
178         0x3c290, 0xffffffff, 0x00000100,
179         0x3c274, 0xffffffff, 0x00000100,
180         0x3c2b4, 0xffffffff, 0x00000100,
181         0x3c2b0, 0xffffffff, 0x00000100,
182         0x3c270, 0xffffffff, 0x00000100,
183         0x30800, 0xffffffff, 0xe0000000,
184         0x3c020, 0xffffffff, 0x00010000,
185         0x3c024, 0xffffffff, 0x00030002,
186         0x3c028, 0xffffffff, 0x00040007,
187         0x3c02c, 0xffffffff, 0x00060005,
188         0x3c030, 0xffffffff, 0x00090008,
189         0x3c034, 0xffffffff, 0x00010000,
190         0x3c038, 0xffffffff, 0x00030002,
191         0x3c03c, 0xffffffff, 0x00040007,
192         0x3c040, 0xffffffff, 0x00060005,
193         0x3c044, 0xffffffff, 0x00090008,
194         0x3c048, 0xffffffff, 0x00010000,
195         0x3c04c, 0xffffffff, 0x00030002,
196         0x3c050, 0xffffffff, 0x00040007,
197         0x3c054, 0xffffffff, 0x00060005,
198         0x3c058, 0xffffffff, 0x00090008,
199         0x3c05c, 0xffffffff, 0x00010000,
200         0x3c060, 0xffffffff, 0x00030002,
201         0x3c064, 0xffffffff, 0x00040007,
202         0x3c068, 0xffffffff, 0x00060005,
203         0x3c06c, 0xffffffff, 0x00090008,
204         0x3c070, 0xffffffff, 0x00010000,
205         0x3c074, 0xffffffff, 0x00030002,
206         0x3c078, 0xffffffff, 0x00040007,
207         0x3c07c, 0xffffffff, 0x00060005,
208         0x3c080, 0xffffffff, 0x00090008,
209         0x3c084, 0xffffffff, 0x00010000,
210         0x3c088, 0xffffffff, 0x00030002,
211         0x3c08c, 0xffffffff, 0x00040007,
212         0x3c090, 0xffffffff, 0x00060005,
213         0x3c094, 0xffffffff, 0x00090008,
214         0x3c098, 0xffffffff, 0x00010000,
215         0x3c09c, 0xffffffff, 0x00030002,
216         0x3c0a0, 0xffffffff, 0x00040007,
217         0x3c0a4, 0xffffffff, 0x00060005,
218         0x3c0a8, 0xffffffff, 0x00090008,
219         0x3c000, 0xffffffff, 0x96e00200,
220         0x8708, 0xffffffff, 0x00900100,
221         0xc424, 0xffffffff, 0x0020003f,
222         0x38, 0xffffffff, 0x0140001c,
223         0x3c, 0x000f0000, 0x000f0000,
224         0x220, 0xffffffff, 0xC060000C,
225         0x224, 0xc0000fff, 0x00000100,
226         0xf90, 0xffffffff, 0x00000100,
227         0xf98, 0x00000101, 0x00000000,
228         0x20a8, 0xffffffff, 0x00000104,
229         0x55e4, 0xff000fff, 0x00000100,
230         0x30cc, 0xc0000fff, 0x00000104,
231         0xc1e4, 0x00000001, 0x00000001,
232         0xd00c, 0xff000ff0, 0x00000100,
233         0xd80c, 0xff000ff0, 0x00000100
234 };
235
236 static const u32 spectre_golden_spm_registers[] =
237 {
238         0x30800, 0xe0ffffff, 0xe0000000
239 };
240
241 static const u32 spectre_golden_common_registers[] =
242 {
243         0xc770, 0xffffffff, 0x00000800,
244         0xc774, 0xffffffff, 0x00000800,
245         0xc798, 0xffffffff, 0x00007fbf,
246         0xc79c, 0xffffffff, 0x00007faf
247 };
248
249 static const u32 spectre_golden_registers[] =
250 {
251         0x3c000, 0xffff1fff, 0x96940200,
252         0x3c00c, 0xffff0001, 0xff000000,
253         0x3c200, 0xfffc0fff, 0x00000100,
254         0x6ed8, 0x00010101, 0x00010000,
255         0x9834, 0xf00fffff, 0x00000400,
256         0x9838, 0xfffffffc, 0x00020200,
257         0x5bb0, 0x000000f0, 0x00000070,
258         0x5bc0, 0xf0311fff, 0x80300000,
259         0x98f8, 0x73773777, 0x12010001,
260         0x9b7c, 0x00ff0000, 0x00fc0000,
261         0x2f48, 0x73773777, 0x12010001,
262         0x8a14, 0xf000003f, 0x00000007,
263         0x8b24, 0xffffffff, 0x00ffffff,
264         0x28350, 0x3f3f3fff, 0x00000082,
265         0x28355, 0x0000003f, 0x00000000,
266         0x3e78, 0x00000001, 0x00000002,
267         0x913c, 0xffff03df, 0x00000004,
268         0xc768, 0x00000008, 0x00000008,
269         0x8c00, 0x000008ff, 0x00000800,
270         0x9508, 0x00010000, 0x00010000,
271         0xac0c, 0xffffffff, 0x54763210,
272         0x214f8, 0x01ff01ff, 0x00000002,
273         0x21498, 0x007ff800, 0x00200000,
274         0x2015c, 0xffffffff, 0x00000f40,
275         0x30934, 0xffffffff, 0x00000001
276 };
277
278 static const u32 spectre_mgcg_cgcg_init[] =
279 {
280         0xc420, 0xffffffff, 0xfffffffc,
281         0x30800, 0xffffffff, 0xe0000000,
282         0x3c2a0, 0xffffffff, 0x00000100,
283         0x3c208, 0xffffffff, 0x00000100,
284         0x3c2c0, 0xffffffff, 0x00000100,
285         0x3c2c8, 0xffffffff, 0x00000100,
286         0x3c2c4, 0xffffffff, 0x00000100,
287         0x55e4, 0xffffffff, 0x00600100,
288         0x3c280, 0xffffffff, 0x00000100,
289         0x3c214, 0xffffffff, 0x06000100,
290         0x3c220, 0xffffffff, 0x00000100,
291         0x3c218, 0xffffffff, 0x06000100,
292         0x3c204, 0xffffffff, 0x00000100,
293         0x3c2e0, 0xffffffff, 0x00000100,
294         0x3c224, 0xffffffff, 0x00000100,
295         0x3c200, 0xffffffff, 0x00000100,
296         0x3c230, 0xffffffff, 0x00000100,
297         0x3c234, 0xffffffff, 0x00000100,
298         0x3c250, 0xffffffff, 0x00000100,
299         0x3c254, 0xffffffff, 0x00000100,
300         0x3c258, 0xffffffff, 0x00000100,
301         0x3c25c, 0xffffffff, 0x00000100,
302         0x3c260, 0xffffffff, 0x00000100,
303         0x3c27c, 0xffffffff, 0x00000100,
304         0x3c278, 0xffffffff, 0x00000100,
305         0x3c210, 0xffffffff, 0x06000100,
306         0x3c290, 0xffffffff, 0x00000100,
307         0x3c274, 0xffffffff, 0x00000100,
308         0x3c2b4, 0xffffffff, 0x00000100,
309         0x3c2b0, 0xffffffff, 0x00000100,
310         0x3c270, 0xffffffff, 0x00000100,
311         0x30800, 0xffffffff, 0xe0000000,
312         0x3c020, 0xffffffff, 0x00010000,
313         0x3c024, 0xffffffff, 0x00030002,
314         0x3c028, 0xffffffff, 0x00040007,
315         0x3c02c, 0xffffffff, 0x00060005,
316         0x3c030, 0xffffffff, 0x00090008,
317         0x3c034, 0xffffffff, 0x00010000,
318         0x3c038, 0xffffffff, 0x00030002,
319         0x3c03c, 0xffffffff, 0x00040007,
320         0x3c040, 0xffffffff, 0x00060005,
321         0x3c044, 0xffffffff, 0x00090008,
322         0x3c048, 0xffffffff, 0x00010000,
323         0x3c04c, 0xffffffff, 0x00030002,
324         0x3c050, 0xffffffff, 0x00040007,
325         0x3c054, 0xffffffff, 0x00060005,
326         0x3c058, 0xffffffff, 0x00090008,
327         0x3c05c, 0xffffffff, 0x00010000,
328         0x3c060, 0xffffffff, 0x00030002,
329         0x3c064, 0xffffffff, 0x00040007,
330         0x3c068, 0xffffffff, 0x00060005,
331         0x3c06c, 0xffffffff, 0x00090008,
332         0x3c070, 0xffffffff, 0x00010000,
333         0x3c074, 0xffffffff, 0x00030002,
334         0x3c078, 0xffffffff, 0x00040007,
335         0x3c07c, 0xffffffff, 0x00060005,
336         0x3c080, 0xffffffff, 0x00090008,
337         0x3c084, 0xffffffff, 0x00010000,
338         0x3c088, 0xffffffff, 0x00030002,
339         0x3c08c, 0xffffffff, 0x00040007,
340         0x3c090, 0xffffffff, 0x00060005,
341         0x3c094, 0xffffffff, 0x00090008,
342         0x3c098, 0xffffffff, 0x00010000,
343         0x3c09c, 0xffffffff, 0x00030002,
344         0x3c0a0, 0xffffffff, 0x00040007,
345         0x3c0a4, 0xffffffff, 0x00060005,
346         0x3c0a8, 0xffffffff, 0x00090008,
347         0x3c0ac, 0xffffffff, 0x00010000,
348         0x3c0b0, 0xffffffff, 0x00030002,
349         0x3c0b4, 0xffffffff, 0x00040007,
350         0x3c0b8, 0xffffffff, 0x00060005,
351         0x3c0bc, 0xffffffff, 0x00090008,
352         0x3c000, 0xffffffff, 0x96e00200,
353         0x8708, 0xffffffff, 0x00900100,
354         0xc424, 0xffffffff, 0x0020003f,
355         0x38, 0xffffffff, 0x0140001c,
356         0x3c, 0x000f0000, 0x000f0000,
357         0x220, 0xffffffff, 0xC060000C,
358         0x224, 0xc0000fff, 0x00000100,
359         0xf90, 0xffffffff, 0x00000100,
360         0xf98, 0x00000101, 0x00000000,
361         0x20a8, 0xffffffff, 0x00000104,
362         0x55e4, 0xff000fff, 0x00000100,
363         0x30cc, 0xc0000fff, 0x00000104,
364         0xc1e4, 0x00000001, 0x00000001,
365         0xd00c, 0xff000ff0, 0x00000100,
366         0xd80c, 0xff000ff0, 0x00000100
367 };
368
369 static const u32 kalindi_golden_spm_registers[] =
370 {
371         0x30800, 0xe0ffffff, 0xe0000000
372 };
373
374 static const u32 kalindi_golden_common_registers[] =
375 {
376         0xc770, 0xffffffff, 0x00000800,
377         0xc774, 0xffffffff, 0x00000800,
378         0xc798, 0xffffffff, 0x00007fbf,
379         0xc79c, 0xffffffff, 0x00007faf
380 };
381
382 static const u32 kalindi_golden_registers[] =
383 {
384         0x3c000, 0xffffdfff, 0x6e944040,
385         0x55e4, 0xff607fff, 0xfc000100,
386         0x3c220, 0xff000fff, 0x00000100,
387         0x3c224, 0xff000fff, 0x00000100,
388         0x3c200, 0xfffc0fff, 0x00000100,
389         0x6ed8, 0x00010101, 0x00010000,
390         0x9830, 0xffffffff, 0x00000000,
391         0x9834, 0xf00fffff, 0x00000400,
392         0x5bb0, 0x000000f0, 0x00000070,
393         0x5bc0, 0xf0311fff, 0x80300000,
394         0x98f8, 0x73773777, 0x12010001,
395         0x98fc, 0xffffffff, 0x00000010,
396         0x9b7c, 0x00ff0000, 0x00fc0000,
397         0x8030, 0x00001f0f, 0x0000100a,
398         0x2f48, 0x73773777, 0x12010001,
399         0x2408, 0x000fffff, 0x000c007f,
400         0x8a14, 0xf000003f, 0x00000007,
401         0x8b24, 0x3fff3fff, 0x00ffcfff,
402         0x30a04, 0x0000ff0f, 0x00000000,
403         0x28a4c, 0x07ffffff, 0x06000000,
404         0x4d8, 0x00000fff, 0x00000100,
405         0x3e78, 0x00000001, 0x00000002,
406         0xc768, 0x00000008, 0x00000008,
407         0x8c00, 0x000000ff, 0x00000003,
408         0x214f8, 0x01ff01ff, 0x00000002,
409         0x21498, 0x007ff800, 0x00200000,
410         0x2015c, 0xffffffff, 0x00000f40,
411         0x88c4, 0x001f3ae3, 0x00000082,
412         0x88d4, 0x0000001f, 0x00000010,
413         0x30934, 0xffffffff, 0x00000000
414 };
415
416 static const u32 kalindi_mgcg_cgcg_init[] =
417 {
418         0xc420, 0xffffffff, 0xfffffffc,
419         0x30800, 0xffffffff, 0xe0000000,
420         0x3c2a0, 0xffffffff, 0x00000100,
421         0x3c208, 0xffffffff, 0x00000100,
422         0x3c2c0, 0xffffffff, 0x00000100,
423         0x3c2c8, 0xffffffff, 0x00000100,
424         0x3c2c4, 0xffffffff, 0x00000100,
425         0x55e4, 0xffffffff, 0x00600100,
426         0x3c280, 0xffffffff, 0x00000100,
427         0x3c214, 0xffffffff, 0x06000100,
428         0x3c220, 0xffffffff, 0x00000100,
429         0x3c218, 0xffffffff, 0x06000100,
430         0x3c204, 0xffffffff, 0x00000100,
431         0x3c2e0, 0xffffffff, 0x00000100,
432         0x3c224, 0xffffffff, 0x00000100,
433         0x3c200, 0xffffffff, 0x00000100,
434         0x3c230, 0xffffffff, 0x00000100,
435         0x3c234, 0xffffffff, 0x00000100,
436         0x3c250, 0xffffffff, 0x00000100,
437         0x3c254, 0xffffffff, 0x00000100,
438         0x3c258, 0xffffffff, 0x00000100,
439         0x3c25c, 0xffffffff, 0x00000100,
440         0x3c260, 0xffffffff, 0x00000100,
441         0x3c27c, 0xffffffff, 0x00000100,
442         0x3c278, 0xffffffff, 0x00000100,
443         0x3c210, 0xffffffff, 0x06000100,
444         0x3c290, 0xffffffff, 0x00000100,
445         0x3c274, 0xffffffff, 0x00000100,
446         0x3c2b4, 0xffffffff, 0x00000100,
447         0x3c2b0, 0xffffffff, 0x00000100,
448         0x3c270, 0xffffffff, 0x00000100,
449         0x30800, 0xffffffff, 0xe0000000,
450         0x3c020, 0xffffffff, 0x00010000,
451         0x3c024, 0xffffffff, 0x00030002,
452         0x3c028, 0xffffffff, 0x00040007,
453         0x3c02c, 0xffffffff, 0x00060005,
454         0x3c030, 0xffffffff, 0x00090008,
455         0x3c034, 0xffffffff, 0x00010000,
456         0x3c038, 0xffffffff, 0x00030002,
457         0x3c03c, 0xffffffff, 0x00040007,
458         0x3c040, 0xffffffff, 0x00060005,
459         0x3c044, 0xffffffff, 0x00090008,
460         0x3c000, 0xffffffff, 0x96e00200,
461         0x8708, 0xffffffff, 0x00900100,
462         0xc424, 0xffffffff, 0x0020003f,
463         0x38, 0xffffffff, 0x0140001c,
464         0x3c, 0x000f0000, 0x000f0000,
465         0x220, 0xffffffff, 0xC060000C,
466         0x224, 0xc0000fff, 0x00000100,
467         0x20a8, 0xffffffff, 0x00000104,
468         0x55e4, 0xff000fff, 0x00000100,
469         0x30cc, 0xc0000fff, 0x00000104,
470         0xc1e4, 0x00000001, 0x00000001,
471         0xd00c, 0xff000ff0, 0x00000100,
472         0xd80c, 0xff000ff0, 0x00000100
473 };
474
475 static void cik_init_golden_registers(struct radeon_device *rdev)
476 {
477         switch (rdev->family) {
478         case CHIP_BONAIRE:
479                 radeon_program_register_sequence(rdev,
480                                                  bonaire_mgcg_cgcg_init,
481                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
482                 radeon_program_register_sequence(rdev,
483                                                  bonaire_golden_registers,
484                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
485                 radeon_program_register_sequence(rdev,
486                                                  bonaire_golden_common_registers,
487                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
488                 radeon_program_register_sequence(rdev,
489                                                  bonaire_golden_spm_registers,
490                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
491                 break;
492         case CHIP_KABINI:
493                 radeon_program_register_sequence(rdev,
494                                                  kalindi_mgcg_cgcg_init,
495                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
496                 radeon_program_register_sequence(rdev,
497                                                  kalindi_golden_registers,
498                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
499                 radeon_program_register_sequence(rdev,
500                                                  kalindi_golden_common_registers,
501                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
502                 radeon_program_register_sequence(rdev,
503                                                  kalindi_golden_spm_registers,
504                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
505                 break;
506         case CHIP_KAVERI:
507                 radeon_program_register_sequence(rdev,
508                                                  spectre_mgcg_cgcg_init,
509                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
510                 radeon_program_register_sequence(rdev,
511                                                  spectre_golden_registers,
512                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
513                 radeon_program_register_sequence(rdev,
514                                                  spectre_golden_common_registers,
515                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
516                 radeon_program_register_sequence(rdev,
517                                                  spectre_golden_spm_registers,
518                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
519                 break;
520         default:
521                 break;
522         }
523 }
524
525 /**
526  * cik_get_xclk - get the xclk
527  *
528  * @rdev: radeon_device pointer
529  *
530  * Returns the reference clock used by the gfx engine
531  * (CIK).
532  */
533 u32 cik_get_xclk(struct radeon_device *rdev)
534 {
535         u32 reference_clock = rdev->clock.spll.reference_freq;
536
537         if (rdev->flags & RADEON_IS_IGP) {
538                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
539                         return reference_clock / 2;
540         } else {
541                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
542                         return reference_clock / 4;
543         }
544         return reference_clock;
545 }
546
547 /**
548  * cik_mm_rdoorbell - read a doorbell dword
549  *
550  * @rdev: radeon_device pointer
551  * @offset: byte offset into the aperture
552  *
553  * Returns the value in the doorbell aperture at the
554  * requested offset (CIK).
555  */
556 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
557 {
558         if (offset < rdev->doorbell.size) {
559                 return readl(((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
560         } else {
561                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
562                 return 0;
563         }
564 }
565
566 /**
567  * cik_mm_wdoorbell - write a doorbell dword
568  *
569  * @rdev: radeon_device pointer
570  * @offset: byte offset into the aperture
571  * @v: value to write
572  *
573  * Writes @v to the doorbell aperture at the
574  * requested offset (CIK).
575  */
576 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
577 {
578         if (offset < rdev->doorbell.size) {
579                 writel(v, ((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
580         } else {
581                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
582         }
583 }
584
585 #define BONAIRE_IO_MC_REGS_SIZE 36
586
587 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
588 {
589         {0x00000070, 0x04400000},
590         {0x00000071, 0x80c01803},
591         {0x00000072, 0x00004004},
592         {0x00000073, 0x00000100},
593         {0x00000074, 0x00ff0000},
594         {0x00000075, 0x34000000},
595         {0x00000076, 0x08000014},
596         {0x00000077, 0x00cc08ec},
597         {0x00000078, 0x00000400},
598         {0x00000079, 0x00000000},
599         {0x0000007a, 0x04090000},
600         {0x0000007c, 0x00000000},
601         {0x0000007e, 0x4408a8e8},
602         {0x0000007f, 0x00000304},
603         {0x00000080, 0x00000000},
604         {0x00000082, 0x00000001},
605         {0x00000083, 0x00000002},
606         {0x00000084, 0xf3e4f400},
607         {0x00000085, 0x052024e3},
608         {0x00000087, 0x00000000},
609         {0x00000088, 0x01000000},
610         {0x0000008a, 0x1c0a0000},
611         {0x0000008b, 0xff010000},
612         {0x0000008d, 0xffffefff},
613         {0x0000008e, 0xfff3efff},
614         {0x0000008f, 0xfff3efbf},
615         {0x00000092, 0xf7ffffff},
616         {0x00000093, 0xffffff7f},
617         {0x00000095, 0x00101101},
618         {0x00000096, 0x00000fff},
619         {0x00000097, 0x00116fff},
620         {0x00000098, 0x60010000},
621         {0x00000099, 0x10010000},
622         {0x0000009a, 0x00006000},
623         {0x0000009b, 0x00001000},
624         {0x0000009f, 0x00b48000}
625 };
626
627 /**
628  * cik_srbm_select - select specific register instances
629  *
630  * @rdev: radeon_device pointer
631  * @me: selected ME (micro engine)
632  * @pipe: pipe
633  * @queue: queue
634  * @vmid: VMID
635  *
636  * Switches the currently active registers instances.  Some
637  * registers are instanced per VMID, others are instanced per
638  * me/pipe/queue combination.
639  */
640 static void cik_srbm_select(struct radeon_device *rdev,
641                             u32 me, u32 pipe, u32 queue, u32 vmid)
642 {
643         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
644                              MEID(me & 0x3) |
645                              VMID(vmid & 0xf) |
646                              QUEUEID(queue & 0x7));
647         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
648 }
649
650 /* ucode loading */
651 /**
652  * ci_mc_load_microcode - load MC ucode into the hw
653  *
654  * @rdev: radeon_device pointer
655  *
656  * Load the GDDR MC ucode into the hw (CIK).
657  * Returns 0 on success, error on failure.
658  */
659 static __unused int ci_mc_load_microcode(struct radeon_device *rdev)
660 {
661         const __be32 *fw_data;
662         u32 running, blackout = 0;
663         u32 *io_mc_regs;
664         int i, ucode_size, regs_size;
665
666         if (!rdev->mc_fw)
667                 return -EINVAL;
668
669         switch (rdev->family) {
670         case CHIP_BONAIRE:
671         default:
672                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
673                 ucode_size = CIK_MC_UCODE_SIZE;
674                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
675                 break;
676         }
677
678         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
679
680         if (running == 0) {
681                 if (running) {
682                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
683                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
684                 }
685
686                 /* reset the engine and set to writable */
687                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
688                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
689
690                 /* load mc io regs */
691                 for (i = 0; i < regs_size; i++) {
692                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
693                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
694                 }
695                 /* load the MC ucode */
696                 fw_data = (const __be32 *)rdev->mc_fw->data;
697                 for (i = 0; i < ucode_size; i++)
698                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
699
700                 /* put the engine back into the active state */
701                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
702                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
703                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
704
705                 /* wait for training to complete */
706                 for (i = 0; i < rdev->usec_timeout; i++) {
707                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
708                                 break;
709                         DRM_UDELAY(1);
710                 }
711                 for (i = 0; i < rdev->usec_timeout; i++) {
712                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
713                                 break;
714                         DRM_UDELAY(1);
715                 }
716
717                 if (running)
718                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
719         }
720
721         return 0;
722 }
723
724 /**
725  * cik_init_microcode - load ucode images from disk
726  *
727  * @rdev: radeon_device pointer
728  *
729  * Use the firmware interface to load the ucode images into
730  * the driver (not loaded into hw).
731  * Returns 0 on success, error on failure.
732  */
733 static int cik_init_microcode(struct radeon_device *rdev)
734 {
735         const char *chip_name;
736         size_t pfp_req_size, me_req_size, ce_req_size,
737                 mec_req_size, rlc_req_size, mc_req_size,
738                 sdma_req_size;
739         char fw_name[30];
740         int err;
741
742         DRM_DEBUG("\n");
743
744         switch (rdev->family) {
745         case CHIP_BONAIRE:
746                 chip_name = "BONAIRE";
747                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
748                 me_req_size = CIK_ME_UCODE_SIZE * 4;
749                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
750                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
751                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
752                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
753                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
754                 break;
755         case CHIP_KAVERI:
756                 chip_name = "KAVERI";
757                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
758                 me_req_size = CIK_ME_UCODE_SIZE * 4;
759                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
760                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
761                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
762                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
763                 break;
764         case CHIP_KABINI:
765                 chip_name = "KABINI";
766                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767                 me_req_size = CIK_ME_UCODE_SIZE * 4;
768                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
769                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
771                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772                 break;
773         default: BUG();
774         }
775
776         DRM_INFO("Loading %s Microcode\n", chip_name);
777         err = 0;
778
779         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
780         rdev->pfp_fw = firmware_get(fw_name);
781         if (rdev->pfp_fw == NULL) {
782                 err = -ENOENT;
783                 goto out;
784         }
785         if (rdev->pfp_fw->datasize != pfp_req_size) {
786                 printk(KERN_ERR
787                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
788                        rdev->pfp_fw->datasize, fw_name);
789                 err = -EINVAL;
790                 goto out;
791         }
792
793         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
794         rdev->me_fw = firmware_get(fw_name);
795         if (rdev->me_fw == NULL) {
796                 err = -ENOENT;
797                 goto out;
798         }
799         if (rdev->me_fw->datasize != me_req_size) {
800                 printk(KERN_ERR
801                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
802                        rdev->me_fw->datasize, fw_name);
803                 err = -EINVAL;
804         }
805
806         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
807         rdev->ce_fw = firmware_get(fw_name);
808         if (rdev->ce_fw == NULL) {
809                 err = -ENOENT;
810                 goto out;
811         }
812         if (rdev->ce_fw->datasize != ce_req_size) {
813                 printk(KERN_ERR
814                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
815                        rdev->ce_fw->datasize, fw_name);
816                 err = -EINVAL;
817         }
818
819         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
820         rdev->mec_fw = firmware_get(fw_name);
821         if (rdev->mec_fw == NULL) {
822                 err = -ENOENT;
823                 goto out;
824         }
825         if (rdev->mec_fw->datasize != mec_req_size) {
826                 printk(KERN_ERR
827                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
828                        rdev->mec_fw->datasize, fw_name);
829                 err = -EINVAL;
830         }
831
832         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
833         rdev->rlc_fw = firmware_get(fw_name);
834         if (rdev->rlc_fw == NULL) {
835                 err = -ENOENT;
836                 goto out;
837         }
838         if (rdev->rlc_fw->datasize != rlc_req_size) {
839                 printk(KERN_ERR
840                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
841                        rdev->rlc_fw->datasize, fw_name);
842                 err = -EINVAL;
843         }
844
845         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
846         rdev->sdma_fw = firmware_get(fw_name);
847         if (rdev->sdma_fw == NULL) {
848                 err = -ENOENT;
849                 goto out;
850         }
851         if (rdev->sdma_fw->datasize != sdma_req_size) {
852                 printk(KERN_ERR
853                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
854                        rdev->sdma_fw->datasize, fw_name);
855                 err = -EINVAL;
856         }
857
858         /* No MC ucode on APUs */
859         if (!(rdev->flags & RADEON_IS_IGP)) {
860                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
861                 rdev->mc_fw = firmware_get(fw_name);
862                 if (rdev->mc_fw == NULL) {
863                         err = -ENOENT;
864                         goto out;
865                 }
866                 if (rdev->mc_fw->datasize != mc_req_size) {
867                         printk(KERN_ERR
868                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
869                                rdev->mc_fw->datasize, fw_name);
870                         err = -EINVAL;
871                 }
872         }
873
874 out:
875         if (err) {
876                 if (err != -EINVAL)
877                         printk(KERN_ERR
878                                "cik_cp: Failed to load firmware \"%s\"\n",
879                                fw_name);
880                 if (rdev->pfp_fw != NULL) {
881                         firmware_put(rdev->pfp_fw, FIRMWARE_UNLOAD);
882                         rdev->pfp_fw = NULL;
883                 }
884                 if (rdev->me_fw != NULL) {
885                         firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
886                         rdev->me_fw = NULL;
887                 }
888                 if (rdev->ce_fw != NULL) {
889                         firmware_put(rdev->ce_fw, FIRMWARE_UNLOAD);
890                         rdev->ce_fw = NULL;
891                 }
892                 if (rdev->mec_fw != NULL) {
893                         firmware_put(rdev->mec_fw, FIRMWARE_UNLOAD);
894                         rdev->mec_fw = NULL;
895                 }
896                 if (rdev->rlc_fw != NULL) {
897                         firmware_put(rdev->rlc_fw, FIRMWARE_UNLOAD);
898                         rdev->rlc_fw = NULL;
899                 }
900                 if (rdev->sdma_fw != NULL) {
901                         firmware_put(rdev->sdma_fw, FIRMWARE_UNLOAD);
902                         rdev->sdma_fw = NULL;
903                 }
904                 if (rdev->mc_fw != NULL) {
905                         firmware_put(rdev->mc_fw, FIRMWARE_UNLOAD);
906                         rdev->mc_fw = NULL;
907                 }
908         }
909         return err;
910 }
911
912 /*
913  * Core functions
914  */
915 /**
916  * cik_tiling_mode_table_init - init the hw tiling table
917  *
918  * @rdev: radeon_device pointer
919  *
920  * Starting with SI, the tiling setup is done globally in a
921  * set of 32 tiling modes.  Rather than selecting each set of
922  * parameters per surface as on older asics, we just select
923  * which index in the tiling table we want to use, and the
924  * surface uses those parameters (CIK).
925  */
926 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
927 {
928         const u32 num_tile_mode_states = 32;
929         const u32 num_secondary_tile_mode_states = 16;
930         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
931         u32 num_pipe_configs;
932         u32 num_rbs = rdev->config.cik.max_backends_per_se *
933                 rdev->config.cik.max_shader_engines;
934
935         switch (rdev->config.cik.mem_row_size_in_kb) {
936         case 1:
937                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
938                 break;
939         case 2:
940         default:
941                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
942                 break;
943         case 4:
944                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
945                 break;
946         }
947
948         num_pipe_configs = rdev->config.cik.max_tile_pipes;
949         if (num_pipe_configs > 8)
950                 num_pipe_configs = 8; /* ??? */
951
952         if (num_pipe_configs == 8) {
953                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
954                         switch (reg_offset) {
955                         case 0:
956                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
957                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
958                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
959                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
960                                 break;
961                         case 1:
962                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
963                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
964                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
965                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
966                                 break;
967                         case 2:
968                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
969                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
970                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
971                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
972                                 break;
973                         case 3:
974                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
975                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
976                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
977                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
978                                 break;
979                         case 4:
980                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
981                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
982                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
983                                                  TILE_SPLIT(split_equal_to_row_size));
984                                 break;
985                         case 5:
986                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
987                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
988                                 break;
989                         case 6:
990                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
991                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
992                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
993                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
994                                 break;
995                         case 7:
996                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
997                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
998                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
999                                                  TILE_SPLIT(split_equal_to_row_size));
1000                                 break;
1001                         case 8:
1002                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1003                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1004                                 break;
1005                         case 9:
1006                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1007                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1008                                 break;
1009                         case 10:
1010                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1011                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1012                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1013                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1014                                 break;
1015                         case 11:
1016                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1017                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1018                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1019                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1020                                 break;
1021                         case 12:
1022                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1023                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1024                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1025                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1026                                 break;
1027                         case 13:
1028                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1029                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1030                                 break;
1031                         case 14:
1032                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1033                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1034                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1035                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1036                                 break;
1037                         case 16:
1038                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1039                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1040                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1041                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1042                                 break;
1043                         case 17:
1044                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1045                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1046                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1047                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1048                                 break;
1049                         case 27:
1050                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1051                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1052                                 break;
1053                         case 28:
1054                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1055                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1056                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1057                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1058                                 break;
1059                         case 29:
1060                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1061                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1062                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1063                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1064                                 break;
1065                         case 30:
1066                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1067                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1068                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1069                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1070                                 break;
1071                         default:
1072                                 gb_tile_moden = 0;
1073                                 break;
1074                         }
1075                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1076                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1077                 }
1078                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1079                         switch (reg_offset) {
1080                         case 0:
1081                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1083                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1084                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1085                                 break;
1086                         case 1:
1087                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1088                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1089                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1090                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1091                                 break;
1092                         case 2:
1093                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1096                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1097                                 break;
1098                         case 3:
1099                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1100                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1101                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1102                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1103                                 break;
1104                         case 4:
1105                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1106                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1107                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1108                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1109                                 break;
1110                         case 5:
1111                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1112                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1113                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1114                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1115                                 break;
1116                         case 6:
1117                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1118                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1119                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1120                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1121                                 break;
1122                         case 8:
1123                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1125                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1126                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1127                                 break;
1128                         case 9:
1129                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1132                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1133                                 break;
1134                         case 10:
1135                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1137                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1138                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1139                                 break;
1140                         case 11:
1141                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1142                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1143                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1144                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1145                                 break;
1146                         case 12:
1147                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1148                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1149                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1150                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1151                                 break;
1152                         case 13:
1153                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1154                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1155                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1156                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1157                                 break;
1158                         case 14:
1159                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1160                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1161                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1162                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1163                                 break;
1164                         default:
1165                                 gb_tile_moden = 0;
1166                                 break;
1167                         }
1168                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1169                 }
1170         } else if (num_pipe_configs == 4) {
1171                 if (num_rbs == 4) {
1172                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1173                                 switch (reg_offset) {
1174                                 case 0:
1175                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1176                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1177                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1178                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1179                                         break;
1180                                 case 1:
1181                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1182                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1183                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1184                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1185                                         break;
1186                                 case 2:
1187                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1188                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1189                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1190                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1191                                         break;
1192                                 case 3:
1193                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1194                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1195                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1196                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1197                                         break;
1198                                 case 4:
1199                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1200                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1201                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1202                                                          TILE_SPLIT(split_equal_to_row_size));
1203                                         break;
1204                                 case 5:
1205                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1206                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1207                                         break;
1208                                 case 6:
1209                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1210                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1211                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1212                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1213                                         break;
1214                                 case 7:
1215                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1216                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1217                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1218                                                          TILE_SPLIT(split_equal_to_row_size));
1219                                         break;
1220                                 case 8:
1221                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1222                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
1223                                         break;
1224                                 case 9:
1225                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1226                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1227                                         break;
1228                                 case 10:
1229                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1230                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1231                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1232                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1233                                         break;
1234                                 case 11:
1235                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1236                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1237                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1238                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1239                                         break;
1240                                 case 12:
1241                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1242                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1243                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1244                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1245                                         break;
1246                                 case 13:
1247                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1248                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1249                                         break;
1250                                 case 14:
1251                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1252                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1253                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1254                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1255                                         break;
1256                                 case 16:
1257                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1258                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1259                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1260                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1261                                         break;
1262                                 case 17:
1263                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1264                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1265                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1266                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1267                                         break;
1268                                 case 27:
1269                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1270                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1271                                         break;
1272                                 case 28:
1273                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1274                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1275                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1276                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1277                                         break;
1278                                 case 29:
1279                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1280                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1281                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1282                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1283                                         break;
1284                                 case 30:
1285                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1286                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1287                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1288                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1289                                         break;
1290                                 default:
1291                                         gb_tile_moden = 0;
1292                                         break;
1293                                 }
1294                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1295                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1296                         }
1297                 } else if (num_rbs < 4) {
1298                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1299                                 switch (reg_offset) {
1300                                 case 0:
1301                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1302                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1303                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1304                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1305                                         break;
1306                                 case 1:
1307                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1308                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1309                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1310                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1311                                         break;
1312                                 case 2:
1313                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1314                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1315                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1316                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1317                                         break;
1318                                 case 3:
1319                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1320                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1321                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1322                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1323                                         break;
1324                                 case 4:
1325                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1326                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1327                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1328                                                          TILE_SPLIT(split_equal_to_row_size));
1329                                         break;
1330                                 case 5:
1331                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1332                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1333                                         break;
1334                                 case 6:
1335                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1336                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1337                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1338                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1339                                         break;
1340                                 case 7:
1341                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1342                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1343                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1344                                                          TILE_SPLIT(split_equal_to_row_size));
1345                                         break;
1346                                 case 8:
1347                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1348                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
1349                                         break;
1350                                 case 9:
1351                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1352                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1353                                         break;
1354                                 case 10:
1355                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1356                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1357                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1358                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1359                                         break;
1360                                 case 11:
1361                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1362                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1363                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1364                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1365                                         break;
1366                                 case 12:
1367                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1368                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1369                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1370                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1371                                         break;
1372                                 case 13:
1373                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1374                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1375                                         break;
1376                                 case 14:
1377                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1378                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1379                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1380                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1381                                         break;
1382                                 case 16:
1383                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1384                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1385                                                         PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1386                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1387                                         break;
1388                                 case 17:
1389                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1390                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1391                                                         PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1392                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1393                                         break;
1394                                 case 27:
1395                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1396                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1397                                         break;
1398                                 case 28:
1399                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1400                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1401                                                         PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1402                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1403                                         break;
1404                                 case 29:
1405                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1406                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1407                                                         PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1408                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1409                                         break;
1410                                 case 30:
1411                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1412                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1413                                                         PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1414                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1415                                         break;
1416                                 default:
1417                                         gb_tile_moden = 0;
1418                                         break;
1419                                 }
1420                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1421                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1422                         }
1423                 }
1424                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1425                         switch (reg_offset) {
1426                                 case 0:
1427                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1429                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1430                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1431                                         break;
1432                                 case 1:
1433                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1434                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1435                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1436                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1437                                         break;
1438                                 case 2:
1439                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1440                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1441                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1442                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1443                                         break;
1444                                 case 3:
1445                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1446                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1447                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1448                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1449                                         break;
1450                                 case 4:
1451                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1452                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1453                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1454                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1455                                         break;
1456                                 case 5:
1457                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1459                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1460                                                         NUM_BANKS(ADDR_SURF_8_BANK));
1461                                         break;
1462                                 case 6:
1463                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1464                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1465                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1466                                                         NUM_BANKS(ADDR_SURF_4_BANK));
1467                                         break;
1468                                 case 8:
1469                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1470                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1471                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1472                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1473                                         break;
1474                                 case 9:
1475                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1476                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1477                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1478                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1479                                         break;
1480                                 case 10:
1481                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1482                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1483                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1484                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1485                                         break;
1486                                 case 11:
1487                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1488                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1489                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1490                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1491                                         break;
1492                                 case 12:
1493                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1494                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1495                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1496                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1497                                         break;
1498                                 case 13:
1499                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1500                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1501                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1502                                                         NUM_BANKS(ADDR_SURF_8_BANK));
1503                                         break;
1504                                 case 14:
1505                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1506                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1507                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1508                                                         NUM_BANKS(ADDR_SURF_4_BANK));
1509                                         break;
1510                                 default:
1511                                         gb_tile_moden = 0;
1512                                         break;
1513                         }
1514                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1515                 }
1516         } else if (num_pipe_configs == 2) {
1517                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1518                         switch (reg_offset) {
1519                                 case 0:
1520                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1521                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1522                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1523                                                         TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1524                                         break;
1525                                 case 1:
1526                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1527                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1528                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1529                                                         TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1530                                         break;
1531                                 case 2:
1532                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1533                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1534                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1535                                                         TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1536                                         break;
1537                                 case 3:
1538                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1539                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1540                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1541                                                         TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1542                                         break;
1543                                 case 4:
1544                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1545                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1546                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1547                                                         TILE_SPLIT(split_equal_to_row_size));
1548                                         break;
1549                                 case 5:
1550                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1551                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1552                                         break;
1553                                 case 6:
1554                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1555                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1556                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1557                                                         TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1558                                         break;
1559                                 case 7:
1560                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1561                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1562                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1563                                                         TILE_SPLIT(split_equal_to_row_size));
1564                                         break;
1565                                 case 8:
1566                                         gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1567                                         break;
1568                                 case 9:
1569                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1570                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1571                                         break;
1572                                 case 10:
1573                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1574                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1575                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1576                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1577                                         break;
1578                                 case 11:
1579                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1580                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1581                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1582                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1583                                         break;
1584                                 case 12:
1585                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1586                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1587                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1588                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1589                                         break;
1590                                 case 13:
1591                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1592                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1593                                         break;
1594                                 case 14:
1595                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1596                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1597                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1598                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1599                                         break;
1600                                 case 16:
1601                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1602                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1603                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1604                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1605                                         break;
1606                                 case 17:
1607                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1608                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1609                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1610                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1611                                         break;
1612                                 case 27:
1613                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1614                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1615                                         break;
1616                                 case 28:
1617                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1618                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1619                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1620                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1621                                         break;
1622                                 case 29:
1623                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1624                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1625                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1626                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1627                                         break;
1628                                 case 30:
1629                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1630                                                         MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1631                                                         PIPE_CONFIG(ADDR_SURF_P2) |
1632                                                         SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1633                                         break;
1634                                 default:
1635                                         gb_tile_moden = 0;
1636                                         break;
1637                         }
1638                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1639                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1640                 }
1641                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1642                         switch (reg_offset) {
1643                                 case 0:
1644                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1645                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1646                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1647                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1648                                         break;
1649                                 case 1:
1650                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1651                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1652                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1653                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1654                                         break;
1655                                 case 2:
1656                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1657                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1658                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1659                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1660                                         break;
1661                                 case 3:
1662                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1663                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1664                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1665                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1666                                         break;
1667                                 case 4:
1668                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1669                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1670                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1671                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1672                                         break;
1673                                 case 5:
1674                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1675                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1676                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1677                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1678                                         break;
1679                                 case 6:
1680                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1681                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1682                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1683                                                         NUM_BANKS(ADDR_SURF_8_BANK));
1684                                         break;
1685                                 case 8:
1686                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1687                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1688                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1689                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1690                                         break;
1691                                 case 9:
1692                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1693                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1694                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1695                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1696                                         break;
1697                                 case 10:
1698                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1699                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1700                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1701                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1702                                         break;
1703                                 case 11:
1704                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1705                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1706                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1707                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1708                                         break;
1709                                 case 12:
1710                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1711                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1712                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1713                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1714                                         break;
1715                                 case 13:
1716                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1717                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1718                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1719                                                         NUM_BANKS(ADDR_SURF_16_BANK));
1720                                         break;
1721                                 case 14:
1722                                         gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1723                                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1724                                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1725                                                         NUM_BANKS(ADDR_SURF_8_BANK));
1726                                         break;
1727                                 default:
1728                                         gb_tile_moden = 0;
1729                                         break;
1730                         }
1731                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1732                 }
1733         } else
1734                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1735 }
1736
1737 /**
1738  * cik_select_se_sh - select which SE, SH to address
1739  *
1740  * @rdev: radeon_device pointer
1741  * @se_num: shader engine to address
1742  * @sh_num: sh block to address
1743  *
1744  * Select which SE, SH combinations to address. Certain
1745  * registers are instanced per SE or SH.  0xffffffff means
1746  * broadcast to all SEs or SHs (CIK).
1747  */
1748 static void cik_select_se_sh(struct radeon_device *rdev,
1749                 u32 se_num, u32 sh_num)
1750 {
1751         u32 data = INSTANCE_BROADCAST_WRITES;
1752
1753         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1754                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1755         else if (se_num == 0xffffffff)
1756                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1757         else if (sh_num == 0xffffffff)
1758                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1759         else
1760                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1761         WREG32(GRBM_GFX_INDEX, data);
1762 }
1763
1764 /**
1765  * cik_create_bitmask - create a bitmask
1766  *
1767  * @bit_width: length of the mask
1768  *
1769  * create a variable length bit mask (CIK).
1770  * Returns the bitmask.
1771  */
1772 static u32 cik_create_bitmask(u32 bit_width)
1773 {
1774         u32 i, mask = 0;
1775
1776         for (i = 0; i < bit_width; i++) {
1777                 mask <<= 1;
1778                 mask |= 1;
1779         }
1780         return mask;
1781 }
1782
1783 /**
1784  * cik_select_se_sh - select which SE, SH to address
1785  *
1786  * @rdev: radeon_device pointer
1787  * @max_rb_num: max RBs (render backends) for the asic
1788  * @se_num: number of SEs (shader engines) for the asic
1789  * @sh_per_se: number of SH blocks per SE for the asic
1790  *
1791  * Calculates the bitmask of disabled RBs (CIK).
1792  * Returns the disabled RB bitmask.
1793  */
1794 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1795                 u32 max_rb_num, u32 se_num,
1796                 u32 sh_per_se)
1797 {
1798         u32 data, mask;
1799
1800         data = RREG32(CC_RB_BACKEND_DISABLE);
1801         if (data & 1)
1802                 data &= BACKEND_DISABLE_MASK;
1803         else
1804                 data = 0;
1805         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1806
1807         data >>= BACKEND_DISABLE_SHIFT;
1808
1809         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1810
1811         return data & mask;
1812 }
1813
1814 /**
1815  * cik_setup_rb - setup the RBs on the asic
1816  *
1817  * @rdev: radeon_device pointer
1818  * @se_num: number of SEs (shader engines) for the asic
1819  * @sh_per_se: number of SH blocks per SE for the asic
1820  * @max_rb_num: max RBs (render backends) for the asic
1821  *
1822  * Configures per-SE/SH RB registers (CIK).
1823  */
1824 static void cik_setup_rb(struct radeon_device *rdev,
1825                 u32 se_num, u32 sh_per_se,
1826                 u32 max_rb_num)
1827 {
1828         int i, j;
1829         u32 data, mask;
1830         u32 disabled_rbs = 0;
1831         u32 enabled_rbs = 0;
1832
1833         for (i = 0; i < se_num; i++) {
1834                 for (j = 0; j < sh_per_se; j++) {
1835                         cik_select_se_sh(rdev, i, j);
1836                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1837                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1838                 }
1839         }
1840         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1841
1842         mask = 1;
1843         for (i = 0; i < max_rb_num; i++) {
1844                 if (!(disabled_rbs & mask))
1845                         enabled_rbs |= mask;
1846                 mask <<= 1;
1847         }
1848
1849         for (i = 0; i < se_num; i++) {
1850                 cik_select_se_sh(rdev, i, 0xffffffff);
1851                 data = 0;
1852                 for (j = 0; j < sh_per_se; j++) {
1853                         switch (enabled_rbs & 3) {
1854                                 case 1:
1855                                         data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1856                                         break;
1857                                 case 2:
1858                                         data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1859                                         break;
1860                                 case 3:
1861                                 default:
1862                                         data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1863                                         break;
1864                         }
1865                         enabled_rbs >>= 2;
1866                 }
1867                 WREG32(PA_SC_RASTER_CONFIG, data);
1868         }
1869         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1870 }
1871
1872 /**
1873  * cik_gpu_init - setup the 3D engine
1874  *
1875  * @rdev: radeon_device pointer
1876  *
1877  * Configures the 3D engine and tiling configuration
1878  * registers so that the 3D engine is usable.
1879  */
1880 static __unused void cik_gpu_init(struct radeon_device *rdev)
1881 {
1882         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1883         u32 mc_shared_chmap, mc_arb_ramcfg;
1884         u32 hdp_host_path_cntl;
1885         u32 tmp;
1886         int i, j;
1887
1888         switch (rdev->family) {
1889                 case CHIP_BONAIRE:
1890                         rdev->config.cik.max_shader_engines = 2;
1891                         rdev->config.cik.max_tile_pipes = 4;
1892                         rdev->config.cik.max_cu_per_sh = 7;
1893                         rdev->config.cik.max_sh_per_se = 1;
1894                         rdev->config.cik.max_backends_per_se = 2;
1895                         rdev->config.cik.max_texture_channel_caches = 4;
1896                         rdev->config.cik.max_gprs = 256;
1897                         rdev->config.cik.max_gs_threads = 32;
1898                         rdev->config.cik.max_hw_contexts = 8;
1899
1900                         rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1901                         rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1902                         rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1903                         rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1904                         gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1905                         break;
1906                 case CHIP_KAVERI:
1907                         /* TODO */
1908                         break;
1909                 case CHIP_KABINI:
1910                 default:
1911                         rdev->config.cik.max_shader_engines = 1;
1912                         rdev->config.cik.max_tile_pipes = 2;
1913                         rdev->config.cik.max_cu_per_sh = 2;
1914                         rdev->config.cik.max_sh_per_se = 1;
1915                         rdev->config.cik.max_backends_per_se = 1;
1916                         rdev->config.cik.max_texture_channel_caches = 2;
1917                         rdev->config.cik.max_gprs = 256;
1918                         rdev->config.cik.max_gs_threads = 16;
1919                         rdev->config.cik.max_hw_contexts = 8;
1920
1921                         rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1922                         rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1923                         rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1924                         rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1925                         gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1926                         break;
1927         }
1928
1929         /* Initialize HDP */
1930         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1931                 WREG32((0x2c14 + j), 0x00000000);
1932                 WREG32((0x2c18 + j), 0x00000000);
1933                 WREG32((0x2c1c + j), 0x00000000);
1934                 WREG32((0x2c20 + j), 0x00000000);
1935                 WREG32((0x2c24 + j), 0x00000000);
1936         }
1937
1938         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1939
1940         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1941
1942         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1943         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1944
1945         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1946         rdev->config.cik.mem_max_burst_length_bytes = 256;
1947         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1948         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1949         if (rdev->config.cik.mem_row_size_in_kb > 4)
1950                 rdev->config.cik.mem_row_size_in_kb = 4;
1951         /* XXX use MC settings? */
1952         rdev->config.cik.shader_engine_tile_size = 32;
1953         rdev->config.cik.num_gpus = 1;
1954         rdev->config.cik.multi_gpu_tile_size = 64;
1955
1956         /* fix up row size */
1957         gb_addr_config &= ~ROW_SIZE_MASK;
1958         switch (rdev->config.cik.mem_row_size_in_kb) {
1959                 case 1:
1960                 default:
1961                         gb_addr_config |= ROW_SIZE(0);
1962                         break;
1963                 case 2:
1964                         gb_addr_config |= ROW_SIZE(1);
1965                         break;
1966                 case 4:
1967                         gb_addr_config |= ROW_SIZE(2);
1968                         break;
1969         }
1970
1971         /* setup tiling info dword.  gb_addr_config is not adequate since it does
1972          * not have bank info, so create a custom tiling dword.
1973          * bits 3:0   num_pipes
1974          * bits 7:4   num_banks
1975          * bits 11:8  group_size
1976          * bits 15:12 row_size
1977          */
1978         rdev->config.cik.tile_config = 0;
1979         switch (rdev->config.cik.num_tile_pipes) {
1980                 case 1:
1981                         rdev->config.cik.tile_config |= (0 << 0);
1982                         break;
1983                 case 2:
1984                         rdev->config.cik.tile_config |= (1 << 0);
1985                         break;
1986                 case 4:
1987                         rdev->config.cik.tile_config |= (2 << 0);
1988                         break;
1989                 case 8:
1990                 default:
1991                         /* XXX what about 12? */
1992                         rdev->config.cik.tile_config |= (3 << 0);
1993                         break;
1994         }
1995         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1996                 rdev->config.cik.tile_config |= 1 << 4;
1997         else
1998                 rdev->config.cik.tile_config |= 0 << 4;
1999         rdev->config.cik.tile_config |=
2000                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2001         rdev->config.cik.tile_config |=
2002                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2003
2004         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2005         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2006         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2007         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2008         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2009         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2010         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2011         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2012
2013         cik_tiling_mode_table_init(rdev);
2014
2015         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2016                         rdev->config.cik.max_sh_per_se,
2017                         rdev->config.cik.max_backends_per_se);
2018
2019         /* set HW defaults for 3D engine */
2020         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2021
2022         WREG32(SX_DEBUG_1, 0x20);
2023
2024         WREG32(TA_CNTL_AUX, 0x00010000);
2025
2026         tmp = RREG32(SPI_CONFIG_CNTL);
2027         tmp |= 0x03000000;
2028         WREG32(SPI_CONFIG_CNTL, tmp);
2029
2030         WREG32(SQ_CONFIG, 1);
2031
2032         WREG32(DB_DEBUG, 0);
2033
2034         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2035         tmp |= 0x00000400;
2036         WREG32(DB_DEBUG2, tmp);
2037
2038         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2039         tmp |= 0x00020200;
2040         WREG32(DB_DEBUG3, tmp);
2041
2042         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2043         tmp |= 0x00018208;
2044         WREG32(CB_HW_CONTROL, tmp);
2045
2046         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2047
2048         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2049                                 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2050                                 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2051                                 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2052
2053         WREG32(VGT_NUM_INSTANCES, 1);
2054
2055         WREG32(CP_PERFMON_CNTL, 0);
2056
2057         WREG32(SQ_CONFIG, 0);
2058
2059         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2060                                 FORCE_EOV_MAX_REZ_CNT(255)));
2061
2062         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2063                         AUTO_INVLD_EN(ES_AND_GS_AUTO));
2064
2065         WREG32(VGT_GS_VERTEX_REUSE, 16);
2066         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2067
2068         tmp = RREG32(HDP_MISC_CNTL);
2069         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2070         WREG32(HDP_MISC_CNTL, tmp);
2071
2072         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2073         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2074
2075         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2076         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2077
2078         DRM_UDELAY(50);
2079 }
2080
2081 /*
2082  * GPU scratch registers helpers function.
2083  */
2084 /**
2085  * cik_scratch_init - setup driver info for CP scratch regs
2086  *
2087  * @rdev: radeon_device pointer
2088  *
2089  * Set up the number and offset of the CP scratch registers.
2090  * NOTE: use of CP scratch registers is a legacy inferface and
2091  * is not used by default on newer asics (r6xx+).  On newer asics,
2092  * memory buffers are used for fences rather than scratch regs.
2093  */
2094 static __unused void cik_scratch_init(struct radeon_device *rdev)
2095 {
2096         int i;
2097
2098         rdev->scratch.num_reg = 7;
2099         rdev->scratch.reg_base = SCRATCH_REG0;
2100         for (i = 0; i < rdev->scratch.num_reg; i++) {
2101                 rdev->scratch.free[i] = true;
2102                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2103         }
2104 }
2105
2106 /**
2107  * cik_ring_test - basic gfx ring test
2108  *
2109  * @rdev: radeon_device pointer
2110  * @ring: radeon_ring structure holding ring information
2111  *
2112  * Allocate a scratch register and write to it using the gfx ring (CIK).
2113  * Provides a basic gfx ring test to verify that the ring is working.
2114  * Used by cik_cp_gfx_resume();
2115  * Returns 0 on success, error on failure.
2116  */
2117 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2118 {
2119         uint32_t scratch;
2120         uint32_t tmp = 0;
2121         unsigned i;
2122         int r;
2123
2124         r = radeon_scratch_get(rdev, &scratch);
2125         if (r) {
2126                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2127                 return r;
2128         }
2129         WREG32(scratch, 0xCAFEDEAD);
2130         r = radeon_ring_lock(rdev, ring, 3);
2131         if (r) {
2132                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2133                 radeon_scratch_free(rdev, scratch);
2134                 return r;
2135         }
2136         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2137         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2138         radeon_ring_write(ring, 0xDEADBEEF);
2139         radeon_ring_unlock_commit(rdev, ring);
2140
2141         for (i = 0; i < rdev->usec_timeout; i++) {
2142                 tmp = RREG32(scratch);
2143                 if (tmp == 0xDEADBEEF)
2144                         break;
2145                 DRM_UDELAY(1);
2146         }
2147         if (i < rdev->usec_timeout) {
2148                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2149         } else {
2150                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2151                           ring->idx, scratch, tmp);
2152                 r = -EINVAL;
2153         }
2154         radeon_scratch_free(rdev, scratch);
2155         return r;
2156 }
2157
2158 /**
2159  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2160  *
2161  * @rdev: radeon_device pointer
2162  * @fence: radeon fence object
2163  *
2164  * Emits a fence sequnce number on the gfx ring and flushes
2165  * GPU caches.
2166  */
2167 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2168                              struct radeon_fence *fence)
2169 {
2170         struct radeon_ring *ring = &rdev->ring[fence->ring];
2171         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2172
2173         /* EVENT_WRITE_EOP - flush caches, send int */
2174         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2175         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2176                                  EOP_TC_ACTION_EN |
2177                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2178                                  EVENT_INDEX(5)));
2179         radeon_ring_write(ring, addr & 0xfffffffc);
2180         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2181         radeon_ring_write(ring, fence->seq);
2182         radeon_ring_write(ring, 0);
2183         /* HDP flush */
2184         /* We should be using the new WAIT_REG_MEM special op packet here
2185          * but it causes the CP to hang
2186          */
2187         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2188         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2189                                  WRITE_DATA_DST_SEL(0)));
2190         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2191         radeon_ring_write(ring, 0);
2192         radeon_ring_write(ring, 0);
2193 }
2194
2195 /**
2196  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2197  *
2198  * @rdev: radeon_device pointer
2199  * @fence: radeon fence object
2200  *
2201  * Emits a fence sequnce number on the compute ring and flushes
2202  * GPU caches.
2203  */
2204 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2205                                  struct radeon_fence *fence)
2206 {
2207         struct radeon_ring *ring = &rdev->ring[fence->ring];
2208         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2209
2210         /* RELEASE_MEM - flush caches, send int */
2211         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2212         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2213                                  EOP_TC_ACTION_EN |
2214                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2215                                  EVENT_INDEX(5)));
2216         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2217         radeon_ring_write(ring, addr & 0xfffffffc);
2218         radeon_ring_write(ring, upper_32_bits(addr));
2219         radeon_ring_write(ring, fence->seq);
2220         radeon_ring_write(ring, 0);
2221         /* HDP flush */
2222         /* We should be using the new WAIT_REG_MEM special op packet here
2223          * but it causes the CP to hang
2224          */
2225         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2226         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2227                                  WRITE_DATA_DST_SEL(0)));
2228         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2229         radeon_ring_write(ring, 0);
2230         radeon_ring_write(ring, 0);
2231 }
2232
2233 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2234                              struct radeon_ring *ring,
2235                              struct radeon_semaphore *semaphore,
2236                              bool emit_wait)
2237 {
2238         uint64_t addr = semaphore->gpu_addr;
2239         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2240
2241         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2242         radeon_ring_write(ring, addr & 0xffffffff);
2243         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2244 }
2245
2246 /*
2247  * IB stuff
2248  */
2249 /**
2250  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2251  *
2252  * @rdev: radeon_device pointer
2253  * @ib: radeon indirect buffer object
2254  *
2255  * Emits an DE (drawing engine) or CE (constant engine) IB
2256  * on the gfx ring.  IBs are usually generated by userspace
2257  * acceleration drivers and submitted to the kernel for
2258  * sheduling on the ring.  This function schedules the IB
2259  * on the gfx ring for execution by the GPU.
2260  */
2261 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2262 {
2263         struct radeon_ring *ring = &rdev->ring[ib->ring];
2264         u32 header, control = INDIRECT_BUFFER_VALID;
2265
2266         if (ib->is_const_ib) {
2267                 /* set switch buffer packet before const IB */
2268                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2269                 radeon_ring_write(ring, 0);
2270
2271                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2272         } else {
2273                 u32 next_rptr;
2274                 if (ring->rptr_save_reg) {
2275                         next_rptr = ring->wptr + 3 + 4;
2276                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2277                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2278                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
2279                         radeon_ring_write(ring, next_rptr);
2280                 } else if (rdev->wb.enabled) {
2281                         next_rptr = ring->wptr + 5 + 4;
2282                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2283                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2284                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2285                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2286                         radeon_ring_write(ring, next_rptr);
2287                 }
2288
2289                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2290         }
2291
2292         control |= ib->length_dw |
2293                 (ib->vm ? (ib->vm->id << 24) : 0);
2294
2295         radeon_ring_write(ring, header);
2296         radeon_ring_write(ring,
2297 #ifdef __BIG_ENDIAN
2298                           (2 << 0) |
2299 #endif
2300                           (ib->gpu_addr & 0xFFFFFFFC));
2301         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2302         radeon_ring_write(ring, control);
2303 }
2304
2305 /**
2306  * cik_ib_test - basic gfx ring IB test
2307  *
2308  * @rdev: radeon_device pointer
2309  * @ring: radeon_ring structure holding ring information
2310  *
2311  * Allocate an IB and execute it on the gfx ring (CIK).
2312  * Provides a basic gfx ring test to verify that IBs are working.
2313  * Returns 0 on success, error on failure.
2314  */
2315 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2316 {
2317         struct radeon_ib ib;
2318         uint32_t scratch;
2319         uint32_t tmp = 0;
2320         unsigned i;
2321         int r;
2322
2323         r = radeon_scratch_get(rdev, &scratch);
2324         if (r) {
2325                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2326                 return r;
2327         }
2328         WREG32(scratch, 0xCAFEDEAD);
2329         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2330         if (r) {
2331                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2332                 return r;
2333         }
2334         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2335         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2336         ib.ptr[2] = 0xDEADBEEF;
2337         ib.length_dw = 3;
2338         r = radeon_ib_schedule(rdev, &ib, NULL);
2339         if (r) {
2340                 radeon_scratch_free(rdev, scratch);
2341                 radeon_ib_free(rdev, &ib);
2342                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2343                 return r;
2344         }
2345         r = radeon_fence_wait(ib.fence, false);
2346         if (r) {
2347                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2348                 return r;
2349         }
2350         for (i = 0; i < rdev->usec_timeout; i++) {
2351                 tmp = RREG32(scratch);
2352                 if (tmp == 0xDEADBEEF)
2353                         break;
2354                 DRM_UDELAY(1);
2355         }
2356         if (i < rdev->usec_timeout) {
2357                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2358         } else {
2359                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2360                           scratch, tmp);
2361                 r = -EINVAL;
2362         }
2363         radeon_scratch_free(rdev, scratch);
2364         radeon_ib_free(rdev, &ib);
2365         return r;
2366 }
2367
2368 /*
2369  * CP.
2370  * On CIK, gfx and compute now have independant command processors.
2371  *
2372  * GFX
2373  * Gfx consists of a single ring and can process both gfx jobs and
2374  * compute jobs.  The gfx CP consists of three microengines (ME):
2375  * PFP - Pre-Fetch Parser
2376  * ME - Micro Engine
2377  * CE - Constant Engine
2378  * The PFP and ME make up what is considered the Drawing Engine (DE).
2379  * The CE is an asynchronous engine used for updating buffer desciptors
2380  * used by the DE so that they can be loaded into cache in parallel
2381  * while the DE is processing state update packets.
2382  *
2383  * Compute
2384  * The compute CP consists of two microengines (ME):
2385  * MEC1 - Compute MicroEngine 1
2386  * MEC2 - Compute MicroEngine 2
2387  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2388  * The queues are exposed to userspace and are programmed directly
2389  * by the compute runtime.
2390  */
2391 /**
2392  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2393  *
2394  * @rdev: radeon_device pointer
2395  * @enable: enable or disable the MEs
2396  *
2397  * Halts or unhalts the gfx MEs.
2398  */
2399 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2400 {
2401         if (enable)
2402                 WREG32(CP_ME_CNTL, 0);
2403         else {
2404                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2405                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2406         }
2407         DRM_UDELAY(50);
2408 }
2409
2410 /**
2411  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2412  *
2413  * @rdev: radeon_device pointer
2414  *
2415  * Loads the gfx PFP, ME, and CE ucode.
2416  * Returns 0 for success, -EINVAL if the ucode is not available.
2417  */
2418 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2419 {
2420         const __be32 *fw_data;
2421         int i;
2422
2423         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2424                 return -EINVAL;
2425
2426         cik_cp_gfx_enable(rdev, false);
2427
2428         /* PFP */
2429         fw_data = (const __be32 *)rdev->pfp_fw->data;
2430         WREG32(CP_PFP_UCODE_ADDR, 0);
2431         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2432                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2433         WREG32(CP_PFP_UCODE_ADDR, 0);
2434
2435         /* CE */
2436         fw_data = (const __be32 *)rdev->ce_fw->data;
2437         WREG32(CP_CE_UCODE_ADDR, 0);
2438         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2439                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2440         WREG32(CP_CE_UCODE_ADDR, 0);
2441
2442         /* ME */
2443         fw_data = (const __be32 *)rdev->me_fw->data;
2444         WREG32(CP_ME_RAM_WADDR, 0);
2445         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2446                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2447         WREG32(CP_ME_RAM_WADDR, 0);
2448
2449         WREG32(CP_PFP_UCODE_ADDR, 0);
2450         WREG32(CP_CE_UCODE_ADDR, 0);
2451         WREG32(CP_ME_RAM_WADDR, 0);
2452         WREG32(CP_ME_RAM_RADDR, 0);
2453         return 0;
2454 }
2455
2456 /**
2457  * cik_cp_gfx_start - start the gfx ring
2458  *
2459  * @rdev: radeon_device pointer
2460  *
2461  * Enables the ring and loads the clear state context and other
2462  * packets required to init the ring.
2463  * Returns 0 for success, error for failure.
2464  */
2465 static int cik_cp_gfx_start(struct radeon_device *rdev)
2466 {
2467         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2468         int r, i;
2469
2470         /* init the CP */
2471         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2472         WREG32(CP_ENDIAN_SWAP, 0);
2473         WREG32(CP_DEVICE_ID, 1);
2474
2475         cik_cp_gfx_enable(rdev, true);
2476
2477         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2478         if (r) {
2479                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2480                 return r;
2481         }
2482
2483         /* init the CE partitions.  CE only used for gfx on CIK */
2484         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2485         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2486         radeon_ring_write(ring, 0xc000);
2487         radeon_ring_write(ring, 0xc000);
2488
2489         /* setup clear context state */
2490         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2491         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2492
2493         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2494         radeon_ring_write(ring, 0x80000000);
2495         radeon_ring_write(ring, 0x80000000);
2496
2497         for (i = 0; i < cik_default_size; i++)
2498                 radeon_ring_write(ring, cik_default_state[i]);
2499
2500         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2501         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2502
2503         /* set clear context state */
2504         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2505         radeon_ring_write(ring, 0);
2506
2507         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2508         radeon_ring_write(ring, 0x00000316);
2509         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2510         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2511
2512         radeon_ring_unlock_commit(rdev, ring);
2513
2514         return 0;
2515 }
2516
2517 /**
2518  * cik_cp_gfx_fini - stop the gfx ring
2519  *
2520  * @rdev: radeon_device pointer
2521  *
2522  * Stop the gfx ring and tear down the driver ring
2523  * info.
2524  */
2525 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2526 {
2527         cik_cp_gfx_enable(rdev, false);
2528         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2529 }
2530
2531 /**
2532  * cik_cp_gfx_resume - setup the gfx ring buffer registers
2533  *
2534  * @rdev: radeon_device pointer
2535  *
2536  * Program the location and size of the gfx ring buffer
2537  * and test it to make sure it's working.
2538  * Returns 0 for success, error for failure.
2539  */
2540 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2541 {
2542         struct radeon_ring *ring;
2543         u32 tmp;
2544         u32 rb_bufsz;
2545         u64 rb_addr;
2546         int r;
2547
2548         WREG32(CP_SEM_WAIT_TIMER, 0x0);
2549         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2550
2551         /* Set the write pointer delay */
2552         WREG32(CP_RB_WPTR_DELAY, 0);
2553
2554         /* set the RB to use vmid 0 */
2555         WREG32(CP_RB_VMID, 0);
2556
2557         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2558
2559         /* ring 0 - compute and gfx */
2560         /* Set ring buffer size */
2561         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2562         rb_bufsz = drm_order(ring->ring_size / 8);
2563         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2564 #ifdef __BIG_ENDIAN
2565         tmp |= BUF_SWAP_32BIT;
2566 #endif
2567         WREG32(CP_RB0_CNTL, tmp);
2568
2569         /* Initialize the ring buffer's read and write pointers */
2570         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2571         ring->wptr = 0;
2572         WREG32(CP_RB0_WPTR, ring->wptr);
2573
2574         /* set the wb address wether it's enabled or not */
2575         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2576         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2577
2578         /* scratch register shadowing is no longer supported */
2579         WREG32(SCRATCH_UMSK, 0);
2580
2581         if (!rdev->wb.enabled)
2582                 tmp |= RB_NO_UPDATE;
2583
2584         DRM_MDELAY(1);
2585         WREG32(CP_RB0_CNTL, tmp);
2586
2587         rb_addr = ring->gpu_addr >> 8;
2588         WREG32(CP_RB0_BASE, rb_addr);
2589         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2590
2591         ring->rptr = RREG32(CP_RB0_RPTR);
2592
2593         /* start the ring */
2594         cik_cp_gfx_start(rdev);
2595         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2596         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2597         if (r) {
2598                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2599                 return r;
2600         }
2601         return 0;
2602 }
2603
2604 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2605                               struct radeon_ring *ring)
2606 {
2607         u32 rptr;
2608
2609
2610
2611         if (rdev->wb.enabled) {
2612                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2613         } else {
2614                 spin_lock(&rdev->srbm_mutex);
2615                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2616                 rptr = RREG32(CP_HQD_PQ_RPTR);
2617                 cik_srbm_select(rdev, 0, 0, 0, 0);
2618                 spin_unlock(&rdev->srbm_mutex);
2619         }
2620         rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2621
2622         return rptr;
2623 }
2624
2625 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2626                               struct radeon_ring *ring)
2627 {
2628         u32 wptr;
2629
2630         if (rdev->wb.enabled) {
2631                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2632         } else {
2633                 spin_lock(&rdev->srbm_mutex);
2634                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2635                 wptr = RREG32(CP_HQD_PQ_WPTR);
2636                 cik_srbm_select(rdev, 0, 0, 0, 0);
2637                 spin_unlock(&rdev->srbm_mutex);
2638         }
2639         wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2640
2641         return wptr;
2642 }
2643
2644 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2645                                struct radeon_ring *ring)
2646 {
2647         u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2648
2649         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2650         WDOORBELL32(ring->doorbell_offset, wptr);
2651 }
2652
2653 /**
2654  * cik_cp_compute_enable - enable/disable the compute CP MEs
2655  *
2656  * @rdev: radeon_device pointer
2657  * @enable: enable or disable the MEs
2658  *
2659  * Halts or unhalts the compute MEs.
2660  */
2661 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2662 {
2663         if (enable)
2664                 WREG32(CP_MEC_CNTL, 0);
2665         else
2666                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2667         DRM_UDELAY(50);
2668 }
2669
2670 /**
2671  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2672  *
2673  * @rdev: radeon_device pointer
2674  *
2675  * Loads the compute MEC1&2 ucode.
2676  * Returns 0 for success, -EINVAL if the ucode is not available.
2677  */
2678 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2679 {
2680         const __be32 *fw_data;
2681         int i;
2682
2683         if (!rdev->mec_fw)
2684                 return -EINVAL;
2685
2686         cik_cp_compute_enable(rdev, false);
2687
2688         /* MEC1 */
2689         fw_data = (const __be32 *)rdev->mec_fw->data;
2690         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2691         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2692                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2693         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2694
2695         if (rdev->family == CHIP_KAVERI) {
2696                 /* MEC2 */
2697                 fw_data = (const __be32 *)rdev->mec_fw->data;
2698                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2699                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2700                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2701                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2702         }
2703
2704         return 0;
2705 }
2706
2707 /**
2708  * cik_cp_compute_start - start the compute queues
2709  *
2710  * @rdev: radeon_device pointer
2711  *
2712  * Enable the compute queues.
2713  * Returns 0 for success, error for failure.
2714  */
2715 static int cik_cp_compute_start(struct radeon_device *rdev)
2716 {
2717         cik_cp_compute_enable(rdev, true);
2718
2719         return 0;
2720 }
2721
2722 /**
2723  * cik_cp_compute_fini - stop the compute queues
2724  *
2725  * @rdev: radeon_device pointer
2726  *
2727  * Stop the compute queues and tear down the driver queue
2728  * info.
2729  */
2730 static void cik_cp_compute_fini(struct radeon_device *rdev)
2731 {
2732         int i, idx, r;
2733
2734         cik_cp_compute_enable(rdev, false);
2735
2736         for (i = 0; i < 2; i++) {
2737                 if (i == 0)
2738                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2739                 else
2740                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2741
2742                 if (rdev->ring[idx].mqd_obj) {
2743                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2744                         if (unlikely(r != 0))
2745                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2746
2747                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2748                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2749
2750                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2751                         rdev->ring[idx].mqd_obj = NULL;
2752                 }
2753         }
2754 }
2755
2756 static void cik_mec_fini(struct radeon_device *rdev)
2757 {
2758         int r;
2759
2760         if (rdev->mec.hpd_eop_obj) {
2761                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2762                 if (unlikely(r != 0))
2763                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2764                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2765                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2766
2767                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2768                 rdev->mec.hpd_eop_obj = NULL;
2769         }
2770 }
2771
2772 #define MEC_HPD_SIZE 2048
2773
2774 static int cik_mec_init(struct radeon_device *rdev)
2775 {
2776         int r;
2777         u32 *hpd;
2778
2779         /*
2780          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2781          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2782          */
2783         if (rdev->family == CHIP_KAVERI)
2784                 rdev->mec.num_mec = 2;
2785         else
2786                 rdev->mec.num_mec = 1;
2787         rdev->mec.num_pipe = 4;
2788         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2789
2790         if (rdev->mec.hpd_eop_obj == NULL) {
2791                 r = radeon_bo_create(rdev,
2792                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2793                                      PAGE_SIZE, true,
2794                                      RADEON_GEM_DOMAIN_GTT, NULL,
2795                                      &rdev->mec.hpd_eop_obj);
2796                 if (r) {
2797                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2798                         return r;
2799                 }
2800         }
2801
2802         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2803         if (unlikely(r != 0)) {
2804                 cik_mec_fini(rdev);
2805                 return r;
2806         }
2807         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2808                           &rdev->mec.hpd_eop_gpu_addr);
2809         if (r) {
2810                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2811                 cik_mec_fini(rdev);
2812                 return r;
2813         }
2814         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2815         if (r) {
2816                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2817                 cik_mec_fini(rdev);
2818                 return r;
2819         }
2820
2821         /* clear memory.  Not sure if this is required or not */
2822         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2823
2824         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2825         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2826
2827         return 0;
2828 }
2829
2830 struct hqd_registers
2831 {
2832         u32 cp_mqd_base_addr;
2833         u32 cp_mqd_base_addr_hi;
2834         u32 cp_hqd_active;
2835         u32 cp_hqd_vmid;
2836         u32 cp_hqd_persistent_state;
2837         u32 cp_hqd_pipe_priority;
2838         u32 cp_hqd_queue_priority;
2839         u32 cp_hqd_quantum;
2840         u32 cp_hqd_pq_base;
2841         u32 cp_hqd_pq_base_hi;
2842         u32 cp_hqd_pq_rptr;
2843         u32 cp_hqd_pq_rptr_report_addr;
2844         u32 cp_hqd_pq_rptr_report_addr_hi;
2845         u32 cp_hqd_pq_wptr_poll_addr;
2846         u32 cp_hqd_pq_wptr_poll_addr_hi;
2847         u32 cp_hqd_pq_doorbell_control;
2848         u32 cp_hqd_pq_wptr;
2849         u32 cp_hqd_pq_control;
2850         u32 cp_hqd_ib_base_addr;
2851         u32 cp_hqd_ib_base_addr_hi;
2852         u32 cp_hqd_ib_rptr;
2853         u32 cp_hqd_ib_control;
2854         u32 cp_hqd_iq_timer;
2855         u32 cp_hqd_iq_rptr;
2856         u32 cp_hqd_dequeue_request;
2857         u32 cp_hqd_dma_offload;
2858         u32 cp_hqd_sema_cmd;
2859         u32 cp_hqd_msg_type;
2860         u32 cp_hqd_atomic0_preop_lo;
2861         u32 cp_hqd_atomic0_preop_hi;
2862         u32 cp_hqd_atomic1_preop_lo;
2863         u32 cp_hqd_atomic1_preop_hi;
2864         u32 cp_hqd_hq_scheduler0;
2865         u32 cp_hqd_hq_scheduler1;
2866         u32 cp_mqd_control;
2867 };
2868
2869 struct bonaire_mqd
2870 {
2871         u32 header;
2872         u32 dispatch_initiator;
2873         u32 dimensions[3];
2874         u32 start_idx[3];
2875         u32 num_threads[3];
2876         u32 pipeline_stat_enable;
2877         u32 perf_counter_enable;
2878         u32 pgm[2];
2879         u32 tba[2];
2880         u32 tma[2];
2881         u32 pgm_rsrc[2];
2882         u32 vmid;
2883         u32 resource_limits;
2884         u32 static_thread_mgmt01[2];
2885         u32 tmp_ring_size;
2886         u32 static_thread_mgmt23[2];
2887         u32 restart[3];
2888         u32 thread_trace_enable;
2889         u32 reserved1;
2890         u32 user_data[16];
2891         u32 vgtcs_invoke_count[2];
2892         struct hqd_registers queue_state;
2893         u32 dequeue_cntr;
2894         u32 interrupt_queue[64];
2895 };
2896
2897 /**
2898  * cik_cp_compute_resume - setup the compute queue registers
2899  *
2900  * @rdev: radeon_device pointer
2901  *
2902  * Program the compute queues and test them to make sure they
2903  * are working.
2904  * Returns 0 for success, error for failure.
2905  */
2906 static int cik_cp_compute_resume(struct radeon_device *rdev)
2907 {
2908         int r, i, idx;
2909         u32 tmp;
2910         bool use_doorbell = true;
2911         u64 hqd_gpu_addr;
2912         u64 mqd_gpu_addr;
2913         u64 eop_gpu_addr;
2914         u64 wb_gpu_addr;
2915         u32 *buf;
2916         struct bonaire_mqd *mqd;
2917
2918         r = cik_cp_compute_start(rdev);
2919         if (r)
2920                 return r;
2921
2922         /* fix up chicken bits */
2923         tmp = RREG32(CP_CPF_DEBUG);
2924         tmp |= (1 << 23);
2925         WREG32(CP_CPF_DEBUG, tmp);
2926
2927         /* init the pipes */
2928         spin_lock(&rdev->srbm_mutex);
2929         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2930                 int me = (i < 4) ? 1 : 2;
2931                 int pipe = (i < 4) ? i : (i - 4);
2932
2933                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2934
2935                 cik_srbm_select(rdev, me, pipe, 0, 0);
2936
2937                 /* write the EOP addr */
2938                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2939                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2940
2941                 /* set the VMID assigned */
2942                 WREG32(CP_HPD_EOP_VMID, 0);
2943
2944                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2945                 tmp = RREG32(CP_HPD_EOP_CONTROL);
2946                 tmp &= ~EOP_SIZE_MASK;
2947                 tmp |= drm_order(MEC_HPD_SIZE / 8);
2948                 WREG32(CP_HPD_EOP_CONTROL, tmp);
2949         }
2950         cik_srbm_select(rdev, 0, 0, 0, 0);
2951         spin_unlock(&rdev->srbm_mutex);
2952
2953         /* init the queues.  Just two for now. */
2954         for (i = 0; i < 2; i++) {
2955                 if (i == 0)
2956                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
2957                 else
2958                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
2959
2960                 if (rdev->ring[idx].mqd_obj == NULL) {
2961                         r = radeon_bo_create(rdev,
2962                                              sizeof(struct bonaire_mqd),
2963                                              PAGE_SIZE, true,
2964                                              RADEON_GEM_DOMAIN_GTT, NULL,
2965                                              &rdev->ring[idx].mqd_obj);
2966                         if (r) {
2967                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2968                                 return r;
2969                         }
2970                 }
2971
2972                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2973                 if (unlikely(r != 0)) {
2974                         cik_cp_compute_fini(rdev);
2975                         return r;
2976                 }
2977                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2978                                   &mqd_gpu_addr);
2979                 if (r) {
2980                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2981                         cik_cp_compute_fini(rdev);
2982                         return r;
2983                 }
2984                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2985                 if (r) {
2986                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2987                         cik_cp_compute_fini(rdev);
2988                         return r;
2989                 }
2990
2991                 /* doorbell offset */
2992                 rdev->ring[idx].doorbell_offset =
2993                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2994
2995                 /* init the mqd struct */
2996                 memset(buf, 0, sizeof(struct bonaire_mqd));
2997
2998                 mqd = (struct bonaire_mqd *)buf;
2999                 mqd->header = 0xC0310800;
3000                 mqd->static_thread_mgmt01[0] = 0xffffffff;
3001                 mqd->static_thread_mgmt01[1] = 0xffffffff;
3002                 mqd->static_thread_mgmt23[0] = 0xffffffff;
3003                 mqd->static_thread_mgmt23[1] = 0xffffffff;
3004
3005                 spin_lock(&rdev->srbm_mutex);
3006                 cik_srbm_select(rdev, rdev->ring[idx].me,
3007                                 rdev->ring[idx].pipe,
3008                                 rdev->ring[idx].queue, 0);
3009
3010                 /* disable wptr polling */
3011                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3012                 tmp &= ~WPTR_POLL_EN;
3013                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3014
3015                 /* enable doorbell? */
3016                 mqd->queue_state.cp_hqd_pq_doorbell_control =
3017                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3018                 if (use_doorbell)
3019                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3020                 else
3021                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3022                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3023                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3024
3025                 /* disable the queue if it's active */
3026                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3027                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3028                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3029                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3030                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3031                         for (i = 0; i < rdev->usec_timeout; i++) {
3032                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3033                                         break;
3034                                 DRM_UDELAY(1);
3035                         }
3036                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3037                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3038                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3039                 }
3040
3041                 /* set the pointer to the MQD */
3042                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3043                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3044                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3045                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3046                 /* set MQD vmid to 0 */
3047                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3048                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3049                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3050
3051                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3052                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3053                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3054                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3055                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3056                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3057
3058                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3059                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3060                 mqd->queue_state.cp_hqd_pq_control &=
3061                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3062
3063                 mqd->queue_state.cp_hqd_pq_control |=
3064                         drm_order(rdev->ring[idx].ring_size / 8);
3065                 mqd->queue_state.cp_hqd_pq_control |=
3066                         (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3067 #ifdef __BIG_ENDIAN
3068                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3069 #endif
3070                 mqd->queue_state.cp_hqd_pq_control &=
3071                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3072                 mqd->queue_state.cp_hqd_pq_control |=
3073                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3074                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3075
3076                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3077                 if (i == 0)
3078                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3079                 else
3080                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3081                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3082                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3083                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3084                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3085                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3086
3087                 /* set the wb address wether it's enabled or not */
3088                 if (i == 0)
3089                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3090                 else
3091                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3092                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3093                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3094                         upper_32_bits(wb_gpu_addr) & 0xffff;
3095                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3096                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3097                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3098                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3099
3100                 /* enable the doorbell if requested */
3101                 if (use_doorbell) {
3102                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3103                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3104                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3105                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3106                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3107                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3108                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3109                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3110
3111                 } else {
3112                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3113                 }
3114                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3115                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3116
3117                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3118                 rdev->ring[idx].wptr = 0;
3119                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3120                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3121                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3122                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3123
3124                 /* set the vmid for the queue */
3125                 mqd->queue_state.cp_hqd_vmid = 0;
3126                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3127
3128                 /* activate the queue */
3129                 mqd->queue_state.cp_hqd_active = 1;
3130                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3131
3132                 cik_srbm_select(rdev, 0, 0, 0, 0);
3133                 spin_unlock(&rdev->srbm_mutex);
3134
3135                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3136                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3137
3138                 rdev->ring[idx].ready = true;
3139                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3140                 if (r)
3141                         rdev->ring[idx].ready = false;
3142         }
3143
3144         return 0;
3145 }
3146
3147 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3148 {
3149         cik_cp_gfx_enable(rdev, enable);
3150         cik_cp_compute_enable(rdev, enable);
3151 }
3152
3153 static int cik_cp_load_microcode(struct radeon_device *rdev)
3154 {
3155         int r;
3156
3157         r = cik_cp_gfx_load_microcode(rdev);
3158         if (r)
3159                 return r;
3160         r = cik_cp_compute_load_microcode(rdev);
3161         if (r)
3162                 return r;
3163
3164         return 0;
3165 }
3166
3167 static void cik_cp_fini(struct radeon_device *rdev)
3168 {
3169         cik_cp_gfx_fini(rdev);
3170         cik_cp_compute_fini(rdev);
3171 }
3172
3173 static int cik_cp_resume(struct radeon_device *rdev)
3174 {
3175         int r;
3176
3177         /* Reset all cp blocks */
3178         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3179         RREG32(GRBM_SOFT_RESET);
3180         DRM_MDELAY(15);
3181         WREG32(GRBM_SOFT_RESET, 0);
3182         RREG32(GRBM_SOFT_RESET);
3183
3184         r = cik_cp_load_microcode(rdev);
3185         if (r)
3186                 return r;
3187
3188         r = cik_cp_gfx_resume(rdev);
3189         if (r)
3190                 return r;
3191         r = cik_cp_compute_resume(rdev);
3192         if (r)
3193                 return r;
3194
3195         return 0;
3196 }
3197
3198 /*
3199  * sDMA - System DMA
3200  * Starting with CIK, the GPU has new asynchronous
3201  * DMA engines.  These engines are used for compute
3202  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
3203  * and each one supports 1 ring buffer used for gfx
3204  * and 2 queues used for compute.
3205  *
3206  * The programming model is very similar to the CP
3207  * (ring buffer, IBs, etc.), but sDMA has it's own
3208  * packet format that is different from the PM4 format
3209  * used by the CP. sDMA supports copying data, writing
3210  * embedded data, solid fills, and a number of other
3211  * things.  It also has support for tiling/detiling of
3212  * buffers.
3213  */
3214 /**
3215  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3216  *
3217  * @rdev: radeon_device pointer
3218  * @ib: IB object to schedule
3219  *
3220  * Schedule an IB in the DMA ring (CIK).
3221  */
3222 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3223                               struct radeon_ib *ib)
3224 {
3225         struct radeon_ring *ring = &rdev->ring[ib->ring];
3226         u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3227
3228         if (rdev->wb.enabled) {
3229                 u32 next_rptr = ring->wptr + 5;
3230                 while ((next_rptr & 7) != 4)
3231                         next_rptr++;
3232                 next_rptr += 4;
3233                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3234                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3235                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3236                 radeon_ring_write(ring, 1); /* number of DWs to follow */
3237                 radeon_ring_write(ring, next_rptr);
3238         }
3239
3240         /* IB packet must end on a 8 DW boundary */
3241         while ((ring->wptr & 7) != 4)
3242                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3243         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3244         radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3245         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3246         radeon_ring_write(ring, ib->length_dw);
3247
3248 }
3249
3250 /**
3251  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3252  *
3253  * @rdev: radeon_device pointer
3254  * @fence: radeon fence object
3255  *
3256  * Add a DMA fence packet to the ring to write
3257  * the fence seq number and DMA trap packet to generate
3258  * an interrupt if needed (CIK).
3259  */
3260 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3261                               struct radeon_fence *fence)
3262 {
3263         struct radeon_ring *ring = &rdev->ring[fence->ring];
3264         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3265         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3266                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3267         u32 ref_and_mask;
3268
3269         if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3270                 ref_and_mask = SDMA0;
3271         else
3272                 ref_and_mask = SDMA1;
3273
3274         /* write the fence */
3275         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3276         radeon_ring_write(ring, addr & 0xffffffff);
3277         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3278         radeon_ring_write(ring, fence->seq);
3279         /* generate an interrupt */
3280         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3281         /* flush HDP */
3282         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3283         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3284         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3285         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3286         radeon_ring_write(ring, ref_and_mask); /* MASK */
3287         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3288 }
3289
3290 /**
3291  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3292  *
3293  * @rdev: radeon_device pointer
3294  * @ring: radeon_ring structure holding ring information
3295  * @semaphore: radeon semaphore object
3296  * @emit_wait: wait or signal semaphore
3297  *
3298  * Add a DMA semaphore packet to the ring wait on or signal
3299  * other rings (CIK).
3300  */
3301 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3302                                   struct radeon_ring *ring,
3303                                   struct radeon_semaphore *semaphore,
3304                                   bool emit_wait)
3305 {
3306         u64 addr = semaphore->gpu_addr;
3307         u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3308
3309         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3310         radeon_ring_write(ring, addr & 0xfffffff8);
3311         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3312 }
3313
3314 /**
3315  * cik_sdma_gfx_stop - stop the gfx async dma engines
3316  *
3317  * @rdev: radeon_device pointer
3318  *
3319  * Stop the gfx async dma ring buffers (CIK).
3320  */
3321 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3322 {
3323         u32 rb_cntl, reg_offset;
3324         int i;
3325
3326         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3327
3328         for (i = 0; i < 2; i++) {
3329                 if (i == 0)
3330                         reg_offset = SDMA0_REGISTER_OFFSET;
3331                 else
3332                         reg_offset = SDMA1_REGISTER_OFFSET;
3333                 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3334                 rb_cntl &= ~SDMA_RB_ENABLE;
3335                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3336                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3337         }
3338 }
3339
3340 /**
3341  * cik_sdma_rlc_stop - stop the compute async dma engines
3342  *
3343  * @rdev: radeon_device pointer
3344  *
3345  * Stop the compute async dma queues (CIK).
3346  */
3347 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3348 {
3349         /* XXX todo */
3350 }
3351
3352 /**
3353  * cik_sdma_enable - stop the async dma engines
3354  *
3355  * @rdev: radeon_device pointer
3356  * @enable: enable/disable the DMA MEs.
3357  *
3358  * Halt or unhalt the async dma engines (CIK).
3359  */
3360 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3361 {
3362         u32 me_cntl, reg_offset;
3363         int i;
3364
3365         for (i = 0; i < 2; i++) {
3366                 if (i == 0)
3367                         reg_offset = SDMA0_REGISTER_OFFSET;
3368                 else
3369                         reg_offset = SDMA1_REGISTER_OFFSET;
3370                 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3371                 if (enable)
3372                         me_cntl &= ~SDMA_HALT;
3373                 else
3374                         me_cntl |= SDMA_HALT;
3375                 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3376         }
3377 }
3378
3379 /**
3380  * cik_sdma_gfx_resume - setup and start the async dma engines
3381  *
3382  * @rdev: radeon_device pointer
3383  *
3384  * Set up the gfx DMA ring buffers and enable them (CIK).
3385  * Returns 0 for success, error for failure.
3386  */
3387 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3388 {
3389         struct radeon_ring *ring;
3390         u32 rb_cntl, ib_cntl;
3391         u32 rb_bufsz;
3392         u32 reg_offset, wb_offset;
3393         int i, r;
3394
3395         for (i = 0; i < 2; i++) {
3396                 if (i == 0) {
3397                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3398                         reg_offset = SDMA0_REGISTER_OFFSET;
3399                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
3400                 } else {
3401                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3402                         reg_offset = SDMA1_REGISTER_OFFSET;
3403                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3404                 }
3405
3406                 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3407                 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3408
3409                 /* Set ring buffer size in dwords */
3410                 rb_bufsz = drm_order(ring->ring_size / 4);
3411                 rb_cntl = rb_bufsz << 1;
3412 #ifdef __BIG_ENDIAN
3413                 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3414 #endif
3415                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3416
3417                 /* Initialize the ring buffer's read and write pointers */
3418                 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3419                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3420
3421                 /* set the wb address whether it's enabled or not */
3422                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3423                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3424                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3425                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3426
3427                 if (rdev->wb.enabled)
3428                         rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3429
3430