45f6c36d0e2d2f645095b407b25ab72fb07bc1d3
[dragonfly.git] / sys / dev / drm / radeon / ni.c
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  * $FreeBSD: head/sys/dev/drm2/radeon/ni.c 254885 2013-08-25 19:37:15Z dumbbell $
24  */
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <uapi_drm/radeon_drm.h>
31 #include "nid.h"
32 #include "atom.h"
33 #include "ni_reg.h"
34 #include "cayman_blit_shaders.h"
35 #include "radeon_ucode.h"
36 #include "clearstate_cayman.h"
37
38 static u32 tn_rlc_save_restore_register_list[] =
39 {
40         0x98fc,
41         0x98f0,
42         0x9834,
43         0x9838,
44         0x9870,
45         0x9874,
46         0x8a14,
47         0x8b24,
48         0x8bcc,
49         0x8b10,
50         0x8c30,
51         0x8d00,
52         0x8d04,
53         0x8c00,
54         0x8c04,
55         0x8c10,
56         0x8c14,
57         0x8d8c,
58         0x8cf0,
59         0x8e38,
60         0x9508,
61         0x9688,
62         0x9608,
63         0x960c,
64         0x9610,
65         0x9614,
66         0x88c4,
67         0x8978,
68         0x88d4,
69         0x900c,
70         0x9100,
71         0x913c,
72         0x90e8,
73         0x9354,
74         0xa008,
75         0x98f8,
76         0x9148,
77         0x914c,
78         0x3f94,
79         0x98f4,
80         0x9b7c,
81         0x3f8c,
82         0x8950,
83         0x8954,
84         0x8a18,
85         0x8b28,
86         0x9144,
87         0x3f90,
88         0x915c,
89         0x9160,
90         0x9178,
91         0x917c,
92         0x9180,
93         0x918c,
94         0x9190,
95         0x9194,
96         0x9198,
97         0x919c,
98         0x91a8,
99         0x91ac,
100         0x91b0,
101         0x91b4,
102         0x91b8,
103         0x91c4,
104         0x91c8,
105         0x91cc,
106         0x91d0,
107         0x91d4,
108         0x91e0,
109         0x91e4,
110         0x91ec,
111         0x91f0,
112         0x91f4,
113         0x9200,
114         0x9204,
115         0x929c,
116         0x8030,
117         0x9150,
118         0x9a60,
119         0x920c,
120         0x9210,
121         0x9228,
122         0x922c,
123         0x9244,
124         0x9248,
125         0x91e8,
126         0x9294,
127         0x9208,
128         0x9224,
129         0x9240,
130         0x9220,
131         0x923c,
132         0x9258,
133         0x9744,
134         0xa200,
135         0xa204,
136         0xa208,
137         0xa20c,
138         0x8d58,
139         0x9030,
140         0x9034,
141         0x9038,
142         0x903c,
143         0x9040,
144         0x9654,
145         0x897c,
146         0xa210,
147         0xa214,
148         0x9868,
149         0xa02c,
150         0x9664,
151         0x9698,
152         0x949c,
153         0x8e10,
154         0x8e18,
155         0x8c50,
156         0x8c58,
157         0x8c60,
158         0x8c68,
159         0x89b4,
160         0x9830,
161         0x802c,
162 };
163 static u32 tn_rlc_save_restore_register_list_size = ARRAY_SIZE(tn_rlc_save_restore_register_list);
164
165 /* Firmware Names */
166 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
167 MODULE_FIRMWARE("radeon/BARTS_me.bin");
168 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
169 MODULE_FIRMWARE("radeon/BARTS_smc.bin");
170 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
171 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
172 MODULE_FIRMWARE("radeon/TURKS_me.bin");
173 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
174 MODULE_FIRMWARE("radeon/TURKS_smc.bin");
175 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
176 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
177 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
178 MODULE_FIRMWARE("radeon/CAICOS_smc.bin");
179 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
180 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
181 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
182 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
183 MODULE_FIRMWARE("radeon/CAYMAN_smc.bin");
184 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
185 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
186 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
187
188 static const u32 cayman_golden_registers2[] =
189 {
190         0x3e5c, 0xffffffff, 0x00000000,
191         0x3e48, 0xffffffff, 0x00000000,
192         0x3e4c, 0xffffffff, 0x00000000,
193         0x3e64, 0xffffffff, 0x00000000,
194         0x3e50, 0xffffffff, 0x00000000,
195         0x3e60, 0xffffffff, 0x00000000
196 };
197
198 static const u32 cayman_golden_registers[] =
199 {
200         0x5eb4, 0xffffffff, 0x00000002,
201         0x5e78, 0x8f311ff1, 0x001000f0,
202         0x3f90, 0xffff0000, 0xff000000,
203         0x9148, 0xffff0000, 0xff000000,
204         0x3f94, 0xffff0000, 0xff000000,
205         0x914c, 0xffff0000, 0xff000000,
206         0xc78, 0x00000080, 0x00000080,
207         0xbd4, 0x70073777, 0x00011003,
208         0xd02c, 0xbfffff1f, 0x08421000,
209         0xd0b8, 0x73773777, 0x02011003,
210         0x5bc0, 0x00200000, 0x50100000,
211         0x98f8, 0x33773777, 0x02011003,
212         0x98fc, 0xffffffff, 0x76541032,
213         0x7030, 0x31000311, 0x00000011,
214         0x2f48, 0x33773777, 0x42010001,
215         0x6b28, 0x00000010, 0x00000012,
216         0x7728, 0x00000010, 0x00000012,
217         0x10328, 0x00000010, 0x00000012,
218         0x10f28, 0x00000010, 0x00000012,
219         0x11b28, 0x00000010, 0x00000012,
220         0x12728, 0x00000010, 0x00000012,
221         0x240c, 0x000007ff, 0x00000000,
222         0x8a14, 0xf000001f, 0x00000007,
223         0x8b24, 0x3fff3fff, 0x00ff0fff,
224         0x8b10, 0x0000ff0f, 0x00000000,
225         0x28a4c, 0x07ffffff, 0x06000000,
226         0x10c, 0x00000001, 0x00010003,
227         0xa02c, 0xffffffff, 0x0000009b,
228         0x913c, 0x0000010f, 0x01000100,
229         0x8c04, 0xf8ff00ff, 0x40600060,
230         0x28350, 0x00000f01, 0x00000000,
231         0x9508, 0x3700001f, 0x00000002,
232         0x960c, 0xffffffff, 0x54763210,
233         0x88c4, 0x001f3ae3, 0x00000082,
234         0x88d0, 0xffffffff, 0x0f40df40,
235         0x88d4, 0x0000001f, 0x00000010,
236         0x8974, 0xffffffff, 0x00000000
237 };
238
239 static const u32 dvst_golden_registers2[] =
240 {
241         0x8f8, 0xffffffff, 0,
242         0x8fc, 0x00380000, 0,
243         0x8f8, 0xffffffff, 1,
244         0x8fc, 0x0e000000, 0
245 };
246
247 static const u32 dvst_golden_registers[] =
248 {
249         0x690, 0x3fff3fff, 0x20c00033,
250         0x918c, 0x0fff0fff, 0x00010006,
251         0x91a8, 0x0fff0fff, 0x00010006,
252         0x9150, 0xffffdfff, 0x6e944040,
253         0x917c, 0x0fff0fff, 0x00030002,
254         0x9198, 0x0fff0fff, 0x00030002,
255         0x915c, 0x0fff0fff, 0x00010000,
256         0x3f90, 0xffff0001, 0xff000000,
257         0x9178, 0x0fff0fff, 0x00070000,
258         0x9194, 0x0fff0fff, 0x00070000,
259         0x9148, 0xffff0001, 0xff000000,
260         0x9190, 0x0fff0fff, 0x00090008,
261         0x91ac, 0x0fff0fff, 0x00090008,
262         0x3f94, 0xffff0000, 0xff000000,
263         0x914c, 0xffff0000, 0xff000000,
264         0x929c, 0x00000fff, 0x00000001,
265         0x55e4, 0xff607fff, 0xfc000100,
266         0x8a18, 0xff000fff, 0x00000100,
267         0x8b28, 0xff000fff, 0x00000100,
268         0x9144, 0xfffc0fff, 0x00000100,
269         0x6ed8, 0x00010101, 0x00010000,
270         0x9830, 0xffffffff, 0x00000000,
271         0x9834, 0xf00fffff, 0x00000400,
272         0x9838, 0xfffffffe, 0x00000000,
273         0xd0c0, 0xff000fff, 0x00000100,
274         0xd02c, 0xbfffff1f, 0x08421000,
275         0xd0b8, 0x73773777, 0x12010001,
276         0x5bb0, 0x000000f0, 0x00000070,
277         0x98f8, 0x73773777, 0x12010001,
278         0x98fc, 0xffffffff, 0x00000010,
279         0x9b7c, 0x00ff0000, 0x00fc0000,
280         0x8030, 0x00001f0f, 0x0000100a,
281         0x2f48, 0x73773777, 0x12010001,
282         0x2408, 0x00030000, 0x000c007f,
283         0x8a14, 0xf000003f, 0x00000007,
284         0x8b24, 0x3fff3fff, 0x00ff0fff,
285         0x8b10, 0x0000ff0f, 0x00000000,
286         0x28a4c, 0x07ffffff, 0x06000000,
287         0x4d8, 0x00000fff, 0x00000100,
288         0xa008, 0xffffffff, 0x00010000,
289         0x913c, 0xffff03ff, 0x01000100,
290         0x8c00, 0x000000ff, 0x00000003,
291         0x8c04, 0xf8ff00ff, 0x40600060,
292         0x8cf0, 0x1fff1fff, 0x08e00410,
293         0x28350, 0x00000f01, 0x00000000,
294         0x9508, 0xf700071f, 0x00000002,
295         0x960c, 0xffffffff, 0x54763210,
296         0x20ef8, 0x01ff01ff, 0x00000002,
297         0x20e98, 0xfffffbff, 0x00200000,
298         0x2015c, 0xffffffff, 0x00000f40,
299         0x88c4, 0x001f3ae3, 0x00000082,
300         0x8978, 0x3fffffff, 0x04050140,
301         0x88d4, 0x0000001f, 0x00000010,
302         0x8974, 0xffffffff, 0x00000000
303 };
304
305 static const u32 scrapper_golden_registers[] =
306 {
307         0x690, 0x3fff3fff, 0x20c00033,
308         0x918c, 0x0fff0fff, 0x00010006,
309         0x918c, 0x0fff0fff, 0x00010006,
310         0x91a8, 0x0fff0fff, 0x00010006,
311         0x91a8, 0x0fff0fff, 0x00010006,
312         0x9150, 0xffffdfff, 0x6e944040,
313         0x9150, 0xffffdfff, 0x6e944040,
314         0x917c, 0x0fff0fff, 0x00030002,
315         0x917c, 0x0fff0fff, 0x00030002,
316         0x9198, 0x0fff0fff, 0x00030002,
317         0x9198, 0x0fff0fff, 0x00030002,
318         0x915c, 0x0fff0fff, 0x00010000,
319         0x915c, 0x0fff0fff, 0x00010000,
320         0x3f90, 0xffff0001, 0xff000000,
321         0x3f90, 0xffff0001, 0xff000000,
322         0x9178, 0x0fff0fff, 0x00070000,
323         0x9178, 0x0fff0fff, 0x00070000,
324         0x9194, 0x0fff0fff, 0x00070000,
325         0x9194, 0x0fff0fff, 0x00070000,
326         0x9148, 0xffff0001, 0xff000000,
327         0x9148, 0xffff0001, 0xff000000,
328         0x9190, 0x0fff0fff, 0x00090008,
329         0x9190, 0x0fff0fff, 0x00090008,
330         0x91ac, 0x0fff0fff, 0x00090008,
331         0x91ac, 0x0fff0fff, 0x00090008,
332         0x3f94, 0xffff0000, 0xff000000,
333         0x3f94, 0xffff0000, 0xff000000,
334         0x914c, 0xffff0000, 0xff000000,
335         0x914c, 0xffff0000, 0xff000000,
336         0x929c, 0x00000fff, 0x00000001,
337         0x929c, 0x00000fff, 0x00000001,
338         0x55e4, 0xff607fff, 0xfc000100,
339         0x8a18, 0xff000fff, 0x00000100,
340         0x8a18, 0xff000fff, 0x00000100,
341         0x8b28, 0xff000fff, 0x00000100,
342         0x8b28, 0xff000fff, 0x00000100,
343         0x9144, 0xfffc0fff, 0x00000100,
344         0x9144, 0xfffc0fff, 0x00000100,
345         0x6ed8, 0x00010101, 0x00010000,
346         0x9830, 0xffffffff, 0x00000000,
347         0x9830, 0xffffffff, 0x00000000,
348         0x9834, 0xf00fffff, 0x00000400,
349         0x9834, 0xf00fffff, 0x00000400,
350         0x9838, 0xfffffffe, 0x00000000,
351         0x9838, 0xfffffffe, 0x00000000,
352         0xd0c0, 0xff000fff, 0x00000100,
353         0xd02c, 0xbfffff1f, 0x08421000,
354         0xd02c, 0xbfffff1f, 0x08421000,
355         0xd0b8, 0x73773777, 0x12010001,
356         0xd0b8, 0x73773777, 0x12010001,
357         0x5bb0, 0x000000f0, 0x00000070,
358         0x98f8, 0x73773777, 0x12010001,
359         0x98f8, 0x73773777, 0x12010001,
360         0x98fc, 0xffffffff, 0x00000010,
361         0x98fc, 0xffffffff, 0x00000010,
362         0x9b7c, 0x00ff0000, 0x00fc0000,
363         0x9b7c, 0x00ff0000, 0x00fc0000,
364         0x8030, 0x00001f0f, 0x0000100a,
365         0x8030, 0x00001f0f, 0x0000100a,
366         0x2f48, 0x73773777, 0x12010001,
367         0x2f48, 0x73773777, 0x12010001,
368         0x2408, 0x00030000, 0x000c007f,
369         0x8a14, 0xf000003f, 0x00000007,
370         0x8a14, 0xf000003f, 0x00000007,
371         0x8b24, 0x3fff3fff, 0x00ff0fff,
372         0x8b24, 0x3fff3fff, 0x00ff0fff,
373         0x8b10, 0x0000ff0f, 0x00000000,
374         0x8b10, 0x0000ff0f, 0x00000000,
375         0x28a4c, 0x07ffffff, 0x06000000,
376         0x28a4c, 0x07ffffff, 0x06000000,
377         0x4d8, 0x00000fff, 0x00000100,
378         0x4d8, 0x00000fff, 0x00000100,
379         0xa008, 0xffffffff, 0x00010000,
380         0xa008, 0xffffffff, 0x00010000,
381         0x913c, 0xffff03ff, 0x01000100,
382         0x913c, 0xffff03ff, 0x01000100,
383         0x90e8, 0x001fffff, 0x010400c0,
384         0x8c00, 0x000000ff, 0x00000003,
385         0x8c00, 0x000000ff, 0x00000003,
386         0x8c04, 0xf8ff00ff, 0x40600060,
387         0x8c04, 0xf8ff00ff, 0x40600060,
388         0x8c30, 0x0000000f, 0x00040005,
389         0x8cf0, 0x1fff1fff, 0x08e00410,
390         0x8cf0, 0x1fff1fff, 0x08e00410,
391         0x900c, 0x00ffffff, 0x0017071f,
392         0x28350, 0x00000f01, 0x00000000,
393         0x28350, 0x00000f01, 0x00000000,
394         0x9508, 0xf700071f, 0x00000002,
395         0x9508, 0xf700071f, 0x00000002,
396         0x9688, 0x00300000, 0x0017000f,
397         0x960c, 0xffffffff, 0x54763210,
398         0x960c, 0xffffffff, 0x54763210,
399         0x20ef8, 0x01ff01ff, 0x00000002,
400         0x20e98, 0xfffffbff, 0x00200000,
401         0x2015c, 0xffffffff, 0x00000f40,
402         0x88c4, 0x001f3ae3, 0x00000082,
403         0x88c4, 0x001f3ae3, 0x00000082,
404         0x8978, 0x3fffffff, 0x04050140,
405         0x8978, 0x3fffffff, 0x04050140,
406         0x88d4, 0x0000001f, 0x00000010,
407         0x88d4, 0x0000001f, 0x00000010,
408         0x8974, 0xffffffff, 0x00000000,
409         0x8974, 0xffffffff, 0x00000000
410 };
411
412 static void ni_init_golden_registers(struct radeon_device *rdev)
413 {
414         switch (rdev->family) {
415         case CHIP_CAYMAN:
416                 radeon_program_register_sequence(rdev,
417                                                  cayman_golden_registers,
418                                                  (const u32)ARRAY_SIZE(cayman_golden_registers));
419                 radeon_program_register_sequence(rdev,
420                                                  cayman_golden_registers2,
421                                                  (const u32)ARRAY_SIZE(cayman_golden_registers2));
422                 break;
423         case CHIP_ARUBA:
424                 if ((rdev->ddev->pci_device == 0x9900) ||
425                     (rdev->ddev->pci_device == 0x9901) ||
426                     (rdev->ddev->pci_device == 0x9903) ||
427                     (rdev->ddev->pci_device == 0x9904) ||
428                     (rdev->ddev->pci_device == 0x9905) ||
429                     (rdev->ddev->pci_device == 0x9906) ||
430                     (rdev->ddev->pci_device == 0x9907) ||
431                     (rdev->ddev->pci_device == 0x9908) ||
432                     (rdev->ddev->pci_device == 0x9909) ||
433                     (rdev->ddev->pci_device == 0x990A) ||
434                     (rdev->ddev->pci_device == 0x990B) ||
435                     (rdev->ddev->pci_device == 0x990C) ||
436                     (rdev->ddev->pci_device == 0x990D) ||
437                     (rdev->ddev->pci_device == 0x990E) ||
438                     (rdev->ddev->pci_device == 0x990F) ||
439                     (rdev->ddev->pci_device == 0x9910) ||
440                     (rdev->ddev->pci_device == 0x9913) ||
441                     (rdev->ddev->pci_device == 0x9917) ||
442                     (rdev->ddev->pci_device == 0x9918)) {
443                         radeon_program_register_sequence(rdev,
444                                                          dvst_golden_registers,
445                                                          (const u32)ARRAY_SIZE(dvst_golden_registers));
446                         radeon_program_register_sequence(rdev,
447                                                          dvst_golden_registers2,
448                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
449                 } else {
450                         radeon_program_register_sequence(rdev,
451                                                          scrapper_golden_registers,
452                                                          (const u32)ARRAY_SIZE(scrapper_golden_registers));
453                         radeon_program_register_sequence(rdev,
454                                                          dvst_golden_registers2,
455                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
456                 }
457                 break;
458         default:
459                 break;
460         }
461 }
462
463 #define BTC_IO_MC_REGS_SIZE 29
464
465 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
466         {0x00000077, 0xff010100},
467         {0x00000078, 0x00000000},
468         {0x00000079, 0x00001434},
469         {0x0000007a, 0xcc08ec08},
470         {0x0000007b, 0x00040000},
471         {0x0000007c, 0x000080c0},
472         {0x0000007d, 0x09000000},
473         {0x0000007e, 0x00210404},
474         {0x00000081, 0x08a8e800},
475         {0x00000082, 0x00030444},
476         {0x00000083, 0x00000000},
477         {0x00000085, 0x00000001},
478         {0x00000086, 0x00000002},
479         {0x00000087, 0x48490000},
480         {0x00000088, 0x20244647},
481         {0x00000089, 0x00000005},
482         {0x0000008b, 0x66030000},
483         {0x0000008c, 0x00006603},
484         {0x0000008d, 0x00000100},
485         {0x0000008f, 0x00001c0a},
486         {0x00000090, 0xff000001},
487         {0x00000094, 0x00101101},
488         {0x00000095, 0x00000fff},
489         {0x00000096, 0x00116fff},
490         {0x00000097, 0x60010000},
491         {0x00000098, 0x10010000},
492         {0x00000099, 0x00006000},
493         {0x0000009a, 0x00001000},
494         {0x0000009f, 0x00946a00}
495 };
496
497 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
498         {0x00000077, 0xff010100},
499         {0x00000078, 0x00000000},
500         {0x00000079, 0x00001434},
501         {0x0000007a, 0xcc08ec08},
502         {0x0000007b, 0x00040000},
503         {0x0000007c, 0x000080c0},
504         {0x0000007d, 0x09000000},
505         {0x0000007e, 0x00210404},
506         {0x00000081, 0x08a8e800},
507         {0x00000082, 0x00030444},
508         {0x00000083, 0x00000000},
509         {0x00000085, 0x00000001},
510         {0x00000086, 0x00000002},
511         {0x00000087, 0x48490000},
512         {0x00000088, 0x20244647},
513         {0x00000089, 0x00000005},
514         {0x0000008b, 0x66030000},
515         {0x0000008c, 0x00006603},
516         {0x0000008d, 0x00000100},
517         {0x0000008f, 0x00001c0a},
518         {0x00000090, 0xff000001},
519         {0x00000094, 0x00101101},
520         {0x00000095, 0x00000fff},
521         {0x00000096, 0x00116fff},
522         {0x00000097, 0x60010000},
523         {0x00000098, 0x10010000},
524         {0x00000099, 0x00006000},
525         {0x0000009a, 0x00001000},
526         {0x0000009f, 0x00936a00}
527 };
528
529 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
530         {0x00000077, 0xff010100},
531         {0x00000078, 0x00000000},
532         {0x00000079, 0x00001434},
533         {0x0000007a, 0xcc08ec08},
534         {0x0000007b, 0x00040000},
535         {0x0000007c, 0x000080c0},
536         {0x0000007d, 0x09000000},
537         {0x0000007e, 0x00210404},
538         {0x00000081, 0x08a8e800},
539         {0x00000082, 0x00030444},
540         {0x00000083, 0x00000000},
541         {0x00000085, 0x00000001},
542         {0x00000086, 0x00000002},
543         {0x00000087, 0x48490000},
544         {0x00000088, 0x20244647},
545         {0x00000089, 0x00000005},
546         {0x0000008b, 0x66030000},
547         {0x0000008c, 0x00006603},
548         {0x0000008d, 0x00000100},
549         {0x0000008f, 0x00001c0a},
550         {0x00000090, 0xff000001},
551         {0x00000094, 0x00101101},
552         {0x00000095, 0x00000fff},
553         {0x00000096, 0x00116fff},
554         {0x00000097, 0x60010000},
555         {0x00000098, 0x10010000},
556         {0x00000099, 0x00006000},
557         {0x0000009a, 0x00001000},
558         {0x0000009f, 0x00916a00}
559 };
560
561 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
562         {0x00000077, 0xff010100},
563         {0x00000078, 0x00000000},
564         {0x00000079, 0x00001434},
565         {0x0000007a, 0xcc08ec08},
566         {0x0000007b, 0x00040000},
567         {0x0000007c, 0x000080c0},
568         {0x0000007d, 0x09000000},
569         {0x0000007e, 0x00210404},
570         {0x00000081, 0x08a8e800},
571         {0x00000082, 0x00030444},
572         {0x00000083, 0x00000000},
573         {0x00000085, 0x00000001},
574         {0x00000086, 0x00000002},
575         {0x00000087, 0x48490000},
576         {0x00000088, 0x20244647},
577         {0x00000089, 0x00000005},
578         {0x0000008b, 0x66030000},
579         {0x0000008c, 0x00006603},
580         {0x0000008d, 0x00000100},
581         {0x0000008f, 0x00001c0a},
582         {0x00000090, 0xff000001},
583         {0x00000094, 0x00101101},
584         {0x00000095, 0x00000fff},
585         {0x00000096, 0x00116fff},
586         {0x00000097, 0x60010000},
587         {0x00000098, 0x10010000},
588         {0x00000099, 0x00006000},
589         {0x0000009a, 0x00001000},
590         {0x0000009f, 0x00976b00}
591 };
592
593 int ni_mc_load_microcode(struct radeon_device *rdev)
594 {
595         const __be32 *fw_data;
596         u32 mem_type, running, blackout = 0;
597         u32 *io_mc_regs;
598         int i, ucode_size, regs_size;
599
600         if (!rdev->mc_fw)
601                 return -EINVAL;
602
603         switch (rdev->family) {
604         case CHIP_BARTS:
605                 io_mc_regs = (u32 *)&barts_io_mc_regs;
606                 ucode_size = BTC_MC_UCODE_SIZE;
607                 regs_size = BTC_IO_MC_REGS_SIZE;
608                 break;
609         case CHIP_TURKS:
610                 io_mc_regs = (u32 *)&turks_io_mc_regs;
611                 ucode_size = BTC_MC_UCODE_SIZE;
612                 regs_size = BTC_IO_MC_REGS_SIZE;
613                 break;
614         case CHIP_CAICOS:
615         default:
616                 io_mc_regs = (u32 *)&caicos_io_mc_regs;
617                 ucode_size = BTC_MC_UCODE_SIZE;
618                 regs_size = BTC_IO_MC_REGS_SIZE;
619                 break;
620         case CHIP_CAYMAN:
621                 io_mc_regs = (u32 *)&cayman_io_mc_regs;
622                 ucode_size = CAYMAN_MC_UCODE_SIZE;
623                 regs_size = BTC_IO_MC_REGS_SIZE;
624                 break;
625         }
626
627         mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
628         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
629
630         if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
631                 if (running) {
632                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
633                         WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
634                 }
635
636                 /* reset the engine and set to writable */
637                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
638                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
639
640                 /* load mc io regs */
641                 for (i = 0; i < regs_size; i++) {
642                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
643                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
644                 }
645                 /* load the MC ucode */
646                 fw_data = (const __be32 *)rdev->mc_fw->data;
647                 for (i = 0; i < ucode_size; i++)
648                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
649
650                 /* put the engine back into the active state */
651                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
652                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
653                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
654
655                 /* wait for training to complete */
656                 for (i = 0; i < rdev->usec_timeout; i++) {
657                         if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
658                                 break;
659                         DRM_UDELAY(1);
660                 }
661
662                 if (running)
663                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
664         }
665
666         return 0;
667 }
668
669 int ni_init_microcode(struct radeon_device *rdev)
670 {
671         const char *chip_name;
672         const char *rlc_chip_name;
673         size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
674         size_t smc_req_size = 0;
675         char fw_name[30];
676         int err;
677
678         DRM_DEBUG("\n");
679
680         switch (rdev->family) {
681         case CHIP_BARTS:
682                 chip_name = "BARTS";
683                 rlc_chip_name = "BTC";
684                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
685                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
686                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
687                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
688                 smc_req_size = ALIGN(BARTS_SMC_UCODE_SIZE, 4);
689                 break;
690         case CHIP_TURKS:
691                 chip_name = "TURKS";
692                 rlc_chip_name = "BTC";
693                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
694                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
695                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
696                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
697                 smc_req_size = ALIGN(TURKS_SMC_UCODE_SIZE, 4);
698                 break;
699         case CHIP_CAICOS:
700                 chip_name = "CAICOS";
701                 rlc_chip_name = "BTC";
702                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
703                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
704                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
705                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
706                 smc_req_size = ALIGN(CAICOS_SMC_UCODE_SIZE, 4);
707                 break;
708         case CHIP_CAYMAN:
709                 chip_name = "CAYMAN";
710                 rlc_chip_name = "CAYMAN";
711                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
712                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
713                 rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
714                 mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
715                 smc_req_size = ALIGN(CAYMAN_SMC_UCODE_SIZE, 4);
716                 break;
717         case CHIP_ARUBA:
718                 chip_name = "ARUBA";
719                 rlc_chip_name = "ARUBA";
720                 /* pfp/me same size as CAYMAN */
721                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
722                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
723                 rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
724                 mc_req_size = 0;
725                 break;
726         default: panic("%s: Unsupported family %d", __func__, rdev->family);
727         }
728
729         DRM_INFO("Loading %s Microcode\n", chip_name);
730
731         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
732         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
733         if (err)
734                 goto out;
735         if (rdev->pfp_fw->datasize != pfp_req_size) {
736                 DRM_ERROR(
737                        "ni_pfp: Bogus length %zu in firmware \"%s\"\n",
738                        rdev->pfp_fw->datasize, fw_name);
739                 err = -EINVAL;
740                 goto out;
741         }
742
743         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
744         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
745         if (err)
746                 goto out;
747         if (rdev->me_fw->datasize != me_req_size) {
748                 DRM_ERROR(
749                        "ni_me: Bogus length %zu in firmware \"%s\"\n",
750                        rdev->me_fw->datasize, fw_name);
751                 err = -EINVAL;
752         }
753
754         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc",
755                   rlc_chip_name);
756         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
757         if (err)
758                 goto out;
759         if (rdev->rlc_fw->datasize != rlc_req_size) {
760                 DRM_ERROR(
761                        "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
762                        rdev->rlc_fw->datasize, fw_name);
763                 err = -EINVAL;
764         }
765
766         /* no MC ucode on TN */
767         if (!(rdev->flags & RADEON_IS_IGP)) {
768                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
769                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
770                 if (err)
771                         goto out;
772                 if (rdev->mc_fw->datasize != mc_req_size) {
773                         DRM_ERROR(
774                                "ni_mc: Bogus length %zu in firmware \"%s\"\n",
775                                rdev->mc_fw->datasize, fw_name);
776                         err = -EINVAL;
777                 }
778         }
779
780         if ((rdev->family >= CHIP_BARTS) && (rdev->family <= CHIP_CAYMAN)) {
781                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
782                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
783                 if (err) {
784                         printk(KERN_ERR
785                                "smc: error loading firmware \"%s\"\n",
786                                fw_name);
787                         release_firmware(rdev->smc_fw);
788                         rdev->smc_fw = NULL;
789                 } else if (rdev->smc_fw->datasize != smc_req_size) {
790                         printk(KERN_ERR
791                                "ni_smc: Bogus length %zu in firmware \"%s\"\n",
792                                rdev->smc_fw->datasize, fw_name);
793                         err = -EINVAL;
794                 }
795         }
796
797 out:
798         if (err) {
799                 if (err != -EINVAL)
800                         DRM_ERROR(
801                                "ni_cp: Failed to load firmware \"%s\"\n",
802                                fw_name);
803                 release_firmware(rdev->pfp_fw);
804                 rdev->pfp_fw = NULL;
805                 release_firmware(rdev->me_fw);
806                 rdev->me_fw = NULL;
807                 release_firmware(rdev->rlc_fw);
808                 rdev->rlc_fw = NULL;
809                 release_firmware(rdev->mc_fw);
810                 rdev->mc_fw = NULL;
811                 release_firmware(rdev->smc_fw);
812                 rdev->smc_fw = NULL;
813         }
814         return err;
815 }
816
817 /**
818  * ni_fini_microcode - drop the firmwares image references
819  *
820  * @rdev: radeon_device pointer
821  *
822  * Drop the pfp, me, mc and rlc firmwares image references.
823  * Called at driver shutdown.
824  */
825 void ni_fini_microcode(struct radeon_device *rdev)
826 {
827         release_firmware(rdev->pfp_fw);
828         rdev->pfp_fw = NULL;
829         release_firmware(rdev->me_fw);
830         rdev->me_fw = NULL;
831         release_firmware(rdev->rlc_fw);
832         rdev->rlc_fw = NULL;
833         release_firmware(rdev->mc_fw);
834         rdev->mc_fw = NULL;
835 }
836
837 int tn_get_temp(struct radeon_device *rdev)
838 {
839         u32 temp = RREG32_SMC(TN_CURRENT_GNB_TEMP) & 0x7ff;
840         int actual_temp = (temp / 8) - 49;
841
842         return actual_temp * 1000;
843 }
844
845 /*
846  * Core functions
847  */
848 static void cayman_gpu_init(struct radeon_device *rdev)
849 {
850         u32 gb_addr_config = 0;
851         u32 mc_shared_chmap, mc_arb_ramcfg;
852         u32 cgts_tcc_disable;
853         u32 sx_debug_1;
854         u32 smx_dc_ctl0;
855         u32 cgts_sm_ctrl_reg;
856         u32 hdp_host_path_cntl;
857         u32 tmp;
858         u32 disabled_rb_mask;
859         int i, j;
860
861         switch (rdev->family) {
862         case CHIP_CAYMAN:
863                 rdev->config.cayman.max_shader_engines = 2;
864                 rdev->config.cayman.max_pipes_per_simd = 4;
865                 rdev->config.cayman.max_tile_pipes = 8;
866                 rdev->config.cayman.max_simds_per_se = 12;
867                 rdev->config.cayman.max_backends_per_se = 4;
868                 rdev->config.cayman.max_texture_channel_caches = 8;
869                 rdev->config.cayman.max_gprs = 256;
870                 rdev->config.cayman.max_threads = 256;
871                 rdev->config.cayman.max_gs_threads = 32;
872                 rdev->config.cayman.max_stack_entries = 512;
873                 rdev->config.cayman.sx_num_of_sets = 8;
874                 rdev->config.cayman.sx_max_export_size = 256;
875                 rdev->config.cayman.sx_max_export_pos_size = 64;
876                 rdev->config.cayman.sx_max_export_smx_size = 192;
877                 rdev->config.cayman.max_hw_contexts = 8;
878                 rdev->config.cayman.sq_num_cf_insts = 2;
879
880                 rdev->config.cayman.sc_prim_fifo_size = 0x100;
881                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
882                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
883                 gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
884                 break;
885         case CHIP_ARUBA:
886         default:
887                 rdev->config.cayman.max_shader_engines = 1;
888                 rdev->config.cayman.max_pipes_per_simd = 4;
889                 rdev->config.cayman.max_tile_pipes = 2;
890                 if ((rdev->ddev->pci_device == 0x9900) ||
891                     (rdev->ddev->pci_device == 0x9901) ||
892                     (rdev->ddev->pci_device == 0x9905) ||
893                     (rdev->ddev->pci_device == 0x9906) ||
894                     (rdev->ddev->pci_device == 0x9907) ||
895                     (rdev->ddev->pci_device == 0x9908) ||
896                     (rdev->ddev->pci_device == 0x9909) ||
897                     (rdev->ddev->pci_device == 0x990B) ||
898                     (rdev->ddev->pci_device == 0x990C) ||
899                     (rdev->ddev->pci_device == 0x990F) ||
900                     (rdev->ddev->pci_device == 0x9910) ||
901                     (rdev->ddev->pci_device == 0x9917) ||
902                     (rdev->ddev->pci_device == 0x9999) ||
903                     (rdev->ddev->pci_device == 0x999C)) {
904                         rdev->config.cayman.max_simds_per_se = 6;
905                         rdev->config.cayman.max_backends_per_se = 2;
906                 } else if ((rdev->ddev->pci_device == 0x9903) ||
907                            (rdev->ddev->pci_device == 0x9904) ||
908                            (rdev->ddev->pci_device == 0x990A) ||
909                            (rdev->ddev->pci_device == 0x990D) ||
910                            (rdev->ddev->pci_device == 0x990E) ||
911                            (rdev->ddev->pci_device == 0x9913) ||
912                            (rdev->ddev->pci_device == 0x9918) ||
913                            (rdev->ddev->pci_device == 0x999D)) {
914                         rdev->config.cayman.max_simds_per_se = 4;
915                         rdev->config.cayman.max_backends_per_se = 2;
916                 } else if ((rdev->ddev->pci_device == 0x9919) ||
917                            (rdev->ddev->pci_device == 0x9990) ||
918                            (rdev->ddev->pci_device == 0x9991) ||
919                            (rdev->ddev->pci_device == 0x9994) ||
920                            (rdev->ddev->pci_device == 0x9995) ||
921                            (rdev->ddev->pci_device == 0x9996) ||
922                            (rdev->ddev->pci_device == 0x999A) ||
923                            (rdev->ddev->pci_device == 0x99A0)) {
924                         rdev->config.cayman.max_simds_per_se = 3;
925                         rdev->config.cayman.max_backends_per_se = 1;
926                 } else {
927                         rdev->config.cayman.max_simds_per_se = 2;
928                         rdev->config.cayman.max_backends_per_se = 1;
929                 }
930                 rdev->config.cayman.max_texture_channel_caches = 2;
931                 rdev->config.cayman.max_gprs = 256;
932                 rdev->config.cayman.max_threads = 256;
933                 rdev->config.cayman.max_gs_threads = 32;
934                 rdev->config.cayman.max_stack_entries = 512;
935                 rdev->config.cayman.sx_num_of_sets = 8;
936                 rdev->config.cayman.sx_max_export_size = 256;
937                 rdev->config.cayman.sx_max_export_pos_size = 64;
938                 rdev->config.cayman.sx_max_export_smx_size = 192;
939                 rdev->config.cayman.max_hw_contexts = 8;
940                 rdev->config.cayman.sq_num_cf_insts = 2;
941
942                 rdev->config.cayman.sc_prim_fifo_size = 0x40;
943                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
944                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
945                 gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
946                 break;
947         }
948
949         /* Initialize HDP */
950         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
951                 WREG32((0x2c14 + j), 0x00000000);
952                 WREG32((0x2c18 + j), 0x00000000);
953                 WREG32((0x2c1c + j), 0x00000000);
954                 WREG32((0x2c20 + j), 0x00000000);
955                 WREG32((0x2c24 + j), 0x00000000);
956         }
957
958         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
959
960         evergreen_fix_pci_max_read_req_size(rdev);
961
962         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
963         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
964
965         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
966         rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
967         if (rdev->config.cayman.mem_row_size_in_kb > 4)
968                 rdev->config.cayman.mem_row_size_in_kb = 4;
969         /* XXX use MC settings? */
970         rdev->config.cayman.shader_engine_tile_size = 32;
971         rdev->config.cayman.num_gpus = 1;
972         rdev->config.cayman.multi_gpu_tile_size = 64;
973
974         tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
975         rdev->config.cayman.num_tile_pipes = (1 << tmp);
976         tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
977         rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
978         tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
979         rdev->config.cayman.num_shader_engines = tmp + 1;
980         tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
981         rdev->config.cayman.num_gpus = tmp + 1;
982         tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
983         rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
984         tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
985         rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
986
987
988         /* setup tiling info dword.  gb_addr_config is not adequate since it does
989          * not have bank info, so create a custom tiling dword.
990          * bits 3:0   num_pipes
991          * bits 7:4   num_banks
992          * bits 11:8  group_size
993          * bits 15:12 row_size
994          */
995         rdev->config.cayman.tile_config = 0;
996         switch (rdev->config.cayman.num_tile_pipes) {
997         case 1:
998         default:
999                 rdev->config.cayman.tile_config |= (0 << 0);
1000                 break;
1001         case 2:
1002                 rdev->config.cayman.tile_config |= (1 << 0);
1003                 break;
1004         case 4:
1005                 rdev->config.cayman.tile_config |= (2 << 0);
1006                 break;
1007         case 8:
1008                 rdev->config.cayman.tile_config |= (3 << 0);
1009                 break;
1010         }
1011
1012         /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
1013         if (rdev->flags & RADEON_IS_IGP)
1014                 rdev->config.cayman.tile_config |= 1 << 4;
1015         else {
1016                 switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1017                 case 0: /* four banks */
1018                         rdev->config.cayman.tile_config |= 0 << 4;
1019                         break;
1020                 case 1: /* eight banks */
1021                         rdev->config.cayman.tile_config |= 1 << 4;
1022                         break;
1023                 case 2: /* sixteen banks */
1024                 default:
1025                         rdev->config.cayman.tile_config |= 2 << 4;
1026                         break;
1027                 }
1028         }
1029         rdev->config.cayman.tile_config |=
1030                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1031         rdev->config.cayman.tile_config |=
1032                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1033
1034         tmp = 0;
1035         for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
1036                 u32 rb_disable_bitmap;
1037
1038                 WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1039                 WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1040                 rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
1041                 tmp <<= 4;
1042                 tmp |= rb_disable_bitmap;
1043         }
1044         /* enabled rb are just the one not disabled :) */
1045         disabled_rb_mask = tmp;
1046         tmp = 0;
1047         for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1048                 tmp |= (1 << i);
1049         /* if all the backends are disabled, fix it up here */
1050         if ((disabled_rb_mask & tmp) == tmp) {
1051                 for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1052                         disabled_rb_mask &= ~(1 << i);
1053         }
1054
1055         WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1056         WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1057
1058         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1059         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1060         if (ASIC_IS_DCE6(rdev))
1061                 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1062         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1063         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1064         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1065         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1066         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1067         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1068
1069         if ((rdev->config.cayman.max_backends_per_se == 1) &&
1070             (rdev->flags & RADEON_IS_IGP)) {
1071                 if ((disabled_rb_mask & 3) == 1) {
1072                         /* RB0 disabled, RB1 enabled */
1073                         tmp = 0x11111111;
1074                 } else {
1075                         /* RB1 disabled, RB0 enabled */
1076                         tmp = 0x00000000;
1077                 }
1078         } else {
1079                 tmp = gb_addr_config & NUM_PIPES_MASK;
1080                 tmp = r6xx_remap_render_backend(rdev, tmp,
1081                                                 rdev->config.cayman.max_backends_per_se *
1082                                                 rdev->config.cayman.max_shader_engines,
1083                                                 CAYMAN_MAX_BACKENDS, disabled_rb_mask);
1084         }
1085         WREG32(GB_BACKEND_MAP, tmp);
1086
1087         cgts_tcc_disable = 0xffff0000;
1088         for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
1089                 cgts_tcc_disable &= ~(1 << (16 + i));
1090         WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1091         WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
1092         WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
1093         WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1094
1095         /* reprogram the shader complex */
1096         cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
1097         for (i = 0; i < 16; i++)
1098                 WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
1099         WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
1100
1101         /* set HW defaults for 3D engine */
1102         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1103
1104         sx_debug_1 = RREG32(SX_DEBUG_1);
1105         sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
1106         WREG32(SX_DEBUG_1, sx_debug_1);
1107
1108         smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
1109         smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
1110         smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
1111         WREG32(SMX_DC_CTL0, smx_dc_ctl0);
1112
1113         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
1114
1115         /* need to be explicitly zero-ed */
1116         WREG32(VGT_OFFCHIP_LDS_BASE, 0);
1117         WREG32(SQ_LSTMP_RING_BASE, 0);
1118         WREG32(SQ_HSTMP_RING_BASE, 0);
1119         WREG32(SQ_ESTMP_RING_BASE, 0);
1120         WREG32(SQ_GSTMP_RING_BASE, 0);
1121         WREG32(SQ_VSTMP_RING_BASE, 0);
1122         WREG32(SQ_PSTMP_RING_BASE, 0);
1123
1124         WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
1125
1126         WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
1127                                         POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
1128                                         SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
1129
1130         WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
1131                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
1132                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
1133
1134
1135         WREG32(VGT_NUM_INSTANCES, 1);
1136
1137         WREG32(CP_PERFMON_CNTL, 0);
1138
1139         WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
1140                                   FETCH_FIFO_HIWATER(0x4) |
1141                                   DONE_FIFO_HIWATER(0xe0) |
1142                                   ALU_UPDATE_FIFO_HIWATER(0x8)));
1143
1144         WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
1145         WREG32(SQ_CONFIG, (VC_ENABLE |
1146                            EXPORT_SRC_C |
1147                            GFX_PRIO(0) |
1148                            CS1_PRIO(0) |
1149                            CS2_PRIO(1)));
1150         WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1151
1152         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1153                                           FORCE_EOV_MAX_REZ_CNT(255)));
1154
1155         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1156                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1157
1158         WREG32(VGT_GS_VERTEX_REUSE, 16);
1159         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1160
1161         WREG32(CB_PERF_CTR0_SEL_0, 0);
1162         WREG32(CB_PERF_CTR0_SEL_1, 0);
1163         WREG32(CB_PERF_CTR1_SEL_0, 0);
1164         WREG32(CB_PERF_CTR1_SEL_1, 0);
1165         WREG32(CB_PERF_CTR2_SEL_0, 0);
1166         WREG32(CB_PERF_CTR2_SEL_1, 0);
1167         WREG32(CB_PERF_CTR3_SEL_0, 0);
1168         WREG32(CB_PERF_CTR3_SEL_1, 0);
1169
1170         tmp = RREG32(HDP_MISC_CNTL);
1171         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1172         WREG32(HDP_MISC_CNTL, tmp);
1173
1174         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1175         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1176
1177         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1178
1179         DRM_UDELAY(50);
1180
1181         /* set clockgating golden values on TN */
1182         if (rdev->family == CHIP_ARUBA) {
1183                 tmp = RREG32_CG(CG_CGTT_LOCAL_0);
1184                 tmp &= ~0x00380000;
1185                 WREG32_CG(CG_CGTT_LOCAL_0, tmp);
1186                 tmp = RREG32_CG(CG_CGTT_LOCAL_1);
1187                 tmp &= ~0x0e000000;
1188                 WREG32_CG(CG_CGTT_LOCAL_1, tmp);
1189         }
1190 }
1191
1192 /*
1193  * GART
1194  */
1195 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1196 {
1197         /* flush hdp cache */
1198         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1199
1200         /* bits 0-7 are the VM contexts0-7 */
1201         WREG32(VM_INVALIDATE_REQUEST, 1);
1202 }
1203
1204 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1205 {
1206         int i, r;
1207
1208         if (rdev->gart.robj == NULL) {
1209                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1210                 return -EINVAL;
1211         }
1212         r = radeon_gart_table_vram_pin(rdev);
1213         if (r)
1214                 return r;
1215         radeon_gart_restore(rdev);
1216         /* Setup TLB control */
1217         WREG32(MC_VM_MX_L1_TLB_CNTL,
1218                (0xA << 7) |
1219                ENABLE_L1_TLB |
1220                ENABLE_L1_FRAGMENT_PROCESSING |
1221                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1222                ENABLE_ADVANCED_DRIVER_MODEL |
1223                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1224         /* Setup L2 cache */
1225         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1226                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1227                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1228                EFFECTIVE_L2_QUEUE_SIZE(7) |
1229                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1230         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1231         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1232                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1233         /* setup context0 */
1234         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1235         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1236         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1237         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1238                         (u32)(rdev->dummy_page.addr >> 12));
1239         WREG32(VM_CONTEXT0_CNTL2, 0);
1240         WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1241                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1242
1243         WREG32(0x15D4, 0);
1244         WREG32(0x15D8, 0);
1245         WREG32(0x15DC, 0);
1246
1247         /* empty context1-7 */
1248         /* Assign the pt base to something valid for now; the pts used for
1249          * the VMs are determined by the application and setup and assigned
1250          * on the fly in the vm part of radeon_gart.c
1251          */
1252         for (i = 1; i < 8; i++) {
1253                 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1254                 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1255                 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1256                         rdev->gart.table_addr >> 12);
1257         }
1258
1259         /* enable context1-7 */
1260         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1261                (u32)(rdev->dummy_page.addr >> 12));
1262         WREG32(VM_CONTEXT1_CNTL2, 4);
1263         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1264                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1265                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1266                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1267                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1268                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1269                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1270                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1271                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1272                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1273                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1274                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1275                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1276
1277         cayman_pcie_gart_tlb_flush(rdev);
1278         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1279                  (unsigned)(rdev->mc.gtt_size >> 20),
1280                  (unsigned long long)rdev->gart.table_addr);
1281         rdev->gart.ready = true;
1282         return 0;
1283 }
1284
1285 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1286 {
1287         /* Disable all tables */
1288         WREG32(VM_CONTEXT0_CNTL, 0);
1289         WREG32(VM_CONTEXT1_CNTL, 0);
1290         /* Setup TLB control */
1291         WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1292                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1293                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1294         /* Setup L2 cache */
1295         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1296                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1297                EFFECTIVE_L2_QUEUE_SIZE(7) |
1298                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1299         WREG32(VM_L2_CNTL2, 0);
1300         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1301                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1302         radeon_gart_table_vram_unpin(rdev);
1303 }
1304
1305 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1306 {
1307         cayman_pcie_gart_disable(rdev);
1308         radeon_gart_table_vram_free(rdev);
1309         radeon_gart_fini(rdev);
1310 }
1311
1312 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1313                               int ring, u32 cp_int_cntl)
1314 {
1315         u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1316
1317         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1318         WREG32(CP_INT_CNTL, cp_int_cntl);
1319 }
1320
1321 /*
1322  * CP.
1323  */
1324 void cayman_fence_ring_emit(struct radeon_device *rdev,
1325                             struct radeon_fence *fence)
1326 {
1327         struct radeon_ring *ring = &rdev->ring[fence->ring];
1328         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1329
1330         /* flush read cache over gart for this vmid */
1331         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1332         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1333         radeon_ring_write(ring, 0);
1334         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1335         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1336         radeon_ring_write(ring, 0xFFFFFFFF);
1337         radeon_ring_write(ring, 0);
1338         radeon_ring_write(ring, 10); /* poll interval */
1339         /* EVENT_WRITE_EOP - flush caches, send int */
1340         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1341         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1342         radeon_ring_write(ring, addr & 0xffffffff);
1343         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1344         radeon_ring_write(ring, fence->seq);
1345         radeon_ring_write(ring, 0);
1346 }
1347
1348 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1349 {
1350         struct radeon_ring *ring = &rdev->ring[ib->ring];
1351
1352         /* set to DX10/11 mode */
1353         radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1354         radeon_ring_write(ring, 1);
1355
1356         if (ring->rptr_save_reg) {
1357                 uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1358                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1359                 radeon_ring_write(ring, ((ring->rptr_save_reg - 
1360                                           PACKET3_SET_CONFIG_REG_START) >> 2));
1361                 radeon_ring_write(ring, next_rptr);
1362         }
1363
1364         radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1365         radeon_ring_write(ring,
1366 #ifdef __BIG_ENDIAN
1367                           (2 << 0) |
1368 #endif
1369                           (ib->gpu_addr & 0xFFFFFFFC));
1370         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1371         radeon_ring_write(ring, ib->length_dw | 
1372                           (ib->vm ? (ib->vm->id << 24) : 0));
1373
1374         /* flush read cache over gart for this vmid */
1375         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1376         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1377         radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1378         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1379         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1380         radeon_ring_write(ring, 0xFFFFFFFF);
1381         radeon_ring_write(ring, 0);
1382         radeon_ring_write(ring, 10); /* poll interval */
1383 }
1384
1385 void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
1386                                struct radeon_ring *ring,
1387                                struct radeon_semaphore *semaphore,
1388                                bool emit_wait)
1389 {
1390         uint64_t addr = semaphore->gpu_addr;
1391
1392         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
1393         radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
1394
1395         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
1396         radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
1397
1398         radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
1399         radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
1400 }
1401
1402 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1403 {
1404         if (enable)
1405                 WREG32(CP_ME_CNTL, 0);
1406         else {
1407                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1408                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1409                 WREG32(SCRATCH_UMSK, 0);
1410                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1411         }
1412 }
1413
1414 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1415 {
1416         const __be32 *fw_data;
1417         int i;
1418
1419         if (!rdev->me_fw || !rdev->pfp_fw)
1420                 return -EINVAL;
1421
1422         cayman_cp_enable(rdev, false);
1423
1424         fw_data = (const __be32 *)rdev->pfp_fw->data;
1425         WREG32(CP_PFP_UCODE_ADDR, 0);
1426         for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1427                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1428         WREG32(CP_PFP_UCODE_ADDR, 0);
1429
1430         fw_data = (const __be32 *)rdev->me_fw->data;
1431         WREG32(CP_ME_RAM_WADDR, 0);
1432         for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1433                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1434
1435         WREG32(CP_PFP_UCODE_ADDR, 0);
1436         WREG32(CP_ME_RAM_WADDR, 0);
1437         WREG32(CP_ME_RAM_RADDR, 0);
1438         return 0;
1439 }
1440
1441 static int cayman_cp_start(struct radeon_device *rdev)
1442 {
1443         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1444         int r, i;
1445
1446         r = radeon_ring_lock(rdev, ring, 7);
1447         if (r) {
1448                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1449                 return r;
1450         }
1451         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1452         radeon_ring_write(ring, 0x1);
1453         radeon_ring_write(ring, 0x0);
1454         radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1455         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1456         radeon_ring_write(ring, 0);
1457         radeon_ring_write(ring, 0);
1458         radeon_ring_unlock_commit(rdev, ring);
1459
1460         cayman_cp_enable(rdev, true);
1461
1462         r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1463         if (r) {
1464                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1465                 return r;
1466         }
1467
1468         /* setup clear context state */
1469         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1470         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1471
1472         for (i = 0; i < cayman_default_size; i++)
1473                 radeon_ring_write(ring, cayman_default_state[i]);
1474
1475         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1476         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1477
1478         /* set clear context state */
1479         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1480         radeon_ring_write(ring, 0);
1481
1482         /* SQ_VTX_BASE_VTX_LOC */
1483         radeon_ring_write(ring, 0xc0026f00);
1484         radeon_ring_write(ring, 0x00000000);
1485         radeon_ring_write(ring, 0x00000000);
1486         radeon_ring_write(ring, 0x00000000);
1487
1488         /* Clear consts */
1489         radeon_ring_write(ring, 0xc0036f00);
1490         radeon_ring_write(ring, 0x00000bc4);
1491         radeon_ring_write(ring, 0xffffffff);
1492         radeon_ring_write(ring, 0xffffffff);
1493         radeon_ring_write(ring, 0xffffffff);
1494
1495         radeon_ring_write(ring, 0xc0026900);
1496         radeon_ring_write(ring, 0x00000316);
1497         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1498         radeon_ring_write(ring, 0x00000010); /*  */
1499
1500         radeon_ring_unlock_commit(rdev, ring);
1501
1502         /* XXX init other rings */
1503
1504         return 0;
1505 }
1506
1507 static void cayman_cp_fini(struct radeon_device *rdev)
1508 {
1509         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1510         cayman_cp_enable(rdev, false);
1511         radeon_ring_fini(rdev, ring);
1512         radeon_scratch_free(rdev, ring->rptr_save_reg);
1513 }
1514
1515 static int cayman_cp_resume(struct radeon_device *rdev)
1516 {
1517         static const int ridx[] = {
1518                 RADEON_RING_TYPE_GFX_INDEX,
1519                 CAYMAN_RING_TYPE_CP1_INDEX,
1520                 CAYMAN_RING_TYPE_CP2_INDEX
1521         };
1522         static const unsigned cp_rb_cntl[] = {
1523                 CP_RB0_CNTL,
1524                 CP_RB1_CNTL,
1525                 CP_RB2_CNTL,
1526         };
1527         static const unsigned cp_rb_rptr_addr[] = {
1528                 CP_RB0_RPTR_ADDR,
1529                 CP_RB1_RPTR_ADDR,
1530                 CP_RB2_RPTR_ADDR
1531         };
1532         static const unsigned cp_rb_rptr_addr_hi[] = {
1533                 CP_RB0_RPTR_ADDR_HI,
1534                 CP_RB1_RPTR_ADDR_HI,
1535                 CP_RB2_RPTR_ADDR_HI
1536         };
1537         static const unsigned cp_rb_base[] = {
1538                 CP_RB0_BASE,
1539                 CP_RB1_BASE,
1540                 CP_RB2_BASE
1541         };
1542         struct radeon_ring *ring;
1543         int i, r;
1544
1545         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1546         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1547                                  SOFT_RESET_PA |
1548                                  SOFT_RESET_SH |
1549                                  SOFT_RESET_VGT |
1550                                  SOFT_RESET_SPI |
1551                                  SOFT_RESET_SX));
1552         RREG32(GRBM_SOFT_RESET);
1553         DRM_MDELAY(15);
1554         WREG32(GRBM_SOFT_RESET, 0);
1555         RREG32(GRBM_SOFT_RESET);
1556
1557         WREG32(CP_SEM_WAIT_TIMER, 0x0);
1558         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1559
1560         /* Set the write pointer delay */
1561         WREG32(CP_RB_WPTR_DELAY, 0);
1562
1563         WREG32(CP_DEBUG, (1 << 27));
1564
1565         /* set the wb address whether it's enabled or not */
1566         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1567         WREG32(SCRATCH_UMSK, 0xff);
1568
1569         for (i = 0; i < 3; ++i) {
1570                 uint32_t rb_cntl;
1571                 uint64_t addr;
1572
1573                 /* Set ring buffer size */
1574                 ring = &rdev->ring[ridx[i]];
1575                 rb_cntl = drm_order(ring->ring_size / 8);
1576                 rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
1577 #ifdef __BIG_ENDIAN
1578                 rb_cntl |= BUF_SWAP_32BIT;
1579 #endif
1580                 WREG32(cp_rb_cntl[i], rb_cntl);
1581
1582                 /* set the wb address whether it's enabled or not */
1583                 addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1584                 WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1585                 WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1586         }
1587
1588         /* set the rb base addr, this causes an internal reset of ALL rings */
1589         for (i = 0; i < 3; ++i) {
1590                 ring = &rdev->ring[ridx[i]];
1591                 WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1592         }
1593
1594         for (i = 0; i < 3; ++i) {
1595                 /* Initialize the ring buffer's read and write pointers */
1596                 ring = &rdev->ring[ridx[i]];
1597                 WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1598
1599                 ring->rptr = ring->wptr = 0;
1600                 WREG32(ring->rptr_reg, ring->rptr);
1601                 WREG32(ring->wptr_reg, ring->wptr);
1602
1603                 DRM_MDELAY(1);
1604                 WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1605         }
1606
1607         /* start the rings */
1608         cayman_cp_start(rdev);
1609         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1610         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1611         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1612         /* this only test cp0 */
1613         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1614         if (r) {
1615                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1616                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1617                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1618                 return r;
1619         }
1620
1621         return 0;
1622 }
1623
1624 /*
1625  * DMA
1626  * Starting with R600, the GPU has an asynchronous
1627  * DMA engine.  The programming model is very similar
1628  * to the 3D engine (ring buffer, IBs, etc.), but the
1629  * DMA controller has it's own packet format that is
1630  * different form the PM4 format used by the 3D engine.
1631  * It supports copying data, writing embedded data,
1632  * solid fills, and a number of other things.  It also
1633  * has support for tiling/detiling of buffers.
1634  * Cayman and newer support two asynchronous DMA engines.
1635  */
1636 /**
1637  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1638  *
1639  * @rdev: radeon_device pointer
1640  * @ib: IB object to schedule
1641  *
1642  * Schedule an IB in the DMA ring (cayman-SI).
1643  */
1644 void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1645                                 struct radeon_ib *ib)
1646 {
1647         struct radeon_ring *ring = &rdev->ring[ib->ring];
1648
1649         if (rdev->wb.enabled) {
1650                 u32 next_rptr = ring->wptr + 4;
1651                 while ((next_rptr & 7) != 5)
1652                         next_rptr++;
1653                 next_rptr += 3;
1654                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1655                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1656                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1657                 radeon_ring_write(ring, next_rptr);
1658         }
1659
1660         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1661          * Pad as necessary with NOPs.
1662          */
1663         while ((ring->wptr & 7) != 5)
1664                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1665         radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1666         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1667         radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1668
1669 }
1670
1671 /**
1672  * cayman_dma_stop - stop the async dma engines
1673  *
1674  * @rdev: radeon_device pointer
1675  *
1676  * Stop the async dma engines (cayman-SI).
1677  */
1678 void cayman_dma_stop(struct radeon_device *rdev)
1679 {
1680         u32 rb_cntl;
1681
1682         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1683
1684         /* dma0 */
1685         rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1686         rb_cntl &= ~DMA_RB_ENABLE;
1687         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1688
1689         /* dma1 */
1690         rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1691         rb_cntl &= ~DMA_RB_ENABLE;
1692         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1693
1694         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1695         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1696 }
1697
1698 /**
1699  * cayman_dma_resume - setup and start the async dma engines
1700  *
1701  * @rdev: radeon_device pointer
1702  *
1703  * Set up the DMA ring buffers and enable them. (cayman-SI).
1704  * Returns 0 for success, error for failure.
1705  */
1706 int cayman_dma_resume(struct radeon_device *rdev)
1707 {
1708         struct radeon_ring *ring;
1709         u32 rb_cntl, dma_cntl, ib_cntl;
1710         u32 rb_bufsz;
1711         u32 reg_offset, wb_offset;
1712         int i, r;
1713
1714         /* Reset dma */
1715         WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1716         RREG32(SRBM_SOFT_RESET);
1717         DRM_UDELAY(50);
1718         WREG32(SRBM_SOFT_RESET, 0);
1719
1720         for (i = 0; i < 2; i++) {
1721                 if (i == 0) {
1722                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1723                         reg_offset = DMA0_REGISTER_OFFSET;
1724                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
1725                 } else {
1726                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1727                         reg_offset = DMA1_REGISTER_OFFSET;
1728                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1729                 }
1730
1731                 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1732                 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1733
1734                 /* Set ring buffer size in dwords */
1735                 rb_bufsz = drm_order(ring->ring_size / 4);
1736                 rb_cntl = rb_bufsz << 1;
1737 #ifdef __BIG_ENDIAN
1738                 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1739 #endif
1740                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1741
1742                 /* Initialize the ring buffer's read and write pointers */
1743                 WREG32(DMA_RB_RPTR + reg_offset, 0);
1744                 WREG32(DMA_RB_WPTR + reg_offset, 0);
1745
1746                 /* set the wb address whether it's enabled or not */
1747                 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1748                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1749                 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1750                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1751
1752                 if (rdev->wb.enabled)
1753                         rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1754
1755                 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1756
1757                 /* enable DMA IBs */
1758                 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1759 #ifdef __BIG_ENDIAN
1760                 ib_cntl |= DMA_IB_SWAP_ENABLE;
1761 #endif
1762                 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
1763
1764                 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1765                 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1766                 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1767
1768                 ring->wptr = 0;
1769                 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1770
1771                 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1772
1773                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1774
1775                 ring->ready = true;
1776
1777                 r = radeon_ring_test(rdev, ring->idx, ring);
1778                 if (r) {
1779                         ring->ready = false;
1780                         return r;
1781                 }
1782         }
1783
1784         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1785
1786         return 0;
1787 }
1788
1789 /**
1790  * cayman_dma_fini - tear down the async dma engines
1791  *
1792  * @rdev: radeon_device pointer
1793  *
1794  * Stop the async dma engines and free the rings (cayman-SI).
1795  */
1796 void cayman_dma_fini(struct radeon_device *rdev)
1797 {
1798         cayman_dma_stop(rdev);
1799         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1800         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1801 }
1802
1803 static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1804 {
1805         u32 reset_mask = 0;
1806         u32 tmp;
1807
1808         /* GRBM_STATUS */
1809         tmp = RREG32(GRBM_STATUS);
1810         if (tmp & (PA_BUSY | SC_BUSY |
1811                    SH_BUSY | SX_BUSY |
1812                    TA_BUSY | VGT_BUSY |
1813                    DB_BUSY | CB_BUSY |
1814                    GDS_BUSY | SPI_BUSY |
1815                    IA_BUSY | IA_BUSY_NO_DMA))
1816                 reset_mask |= RADEON_RESET_GFX;
1817
1818         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1819                    CP_BUSY | CP_COHERENCY_BUSY))
1820                 reset_mask |= RADEON_RESET_CP;
1821
1822         if (tmp & GRBM_EE_BUSY)
1823                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1824
1825         /* DMA_STATUS_REG 0 */
1826         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1827         if (!(tmp & DMA_IDLE))
1828                 reset_mask |= RADEON_RESET_DMA;
1829
1830         /* DMA_STATUS_REG 1 */
1831         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1832         if (!(tmp & DMA_IDLE))
1833                 reset_mask |= RADEON_RESET_DMA1;
1834
1835         /* SRBM_STATUS2 */
1836         tmp = RREG32(SRBM_STATUS2);
1837         if (tmp & DMA_BUSY)
1838                 reset_mask |= RADEON_RESET_DMA;
1839
1840         if (tmp & DMA1_BUSY)
1841                 reset_mask |= RADEON_RESET_DMA1;
1842
1843         /* SRBM_STATUS */
1844         tmp = RREG32(SRBM_STATUS);
1845         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1846                 reset_mask |= RADEON_RESET_RLC;
1847
1848         if (tmp & IH_BUSY)
1849                 reset_mask |= RADEON_RESET_IH;
1850
1851         if (tmp & SEM_BUSY)
1852                 reset_mask |= RADEON_RESET_SEM;
1853
1854         if (tmp & GRBM_RQ_PENDING)
1855                 reset_mask |= RADEON_RESET_GRBM;
1856
1857         if (tmp & VMC_BUSY)
1858                 reset_mask |= RADEON_RESET_VMC;
1859
1860         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1861                    MCC_BUSY | MCD_BUSY))
1862                 reset_mask |= RADEON_RESET_MC;
1863
1864         if (evergreen_is_display_hung(rdev))
1865                 reset_mask |= RADEON_RESET_DISPLAY;
1866
1867         /* VM_L2_STATUS */
1868         tmp = RREG32(VM_L2_STATUS);
1869         if (tmp & L2_BUSY)
1870                 reset_mask |= RADEON_RESET_VMC;
1871
1872         /* Skip MC reset as it's mostly likely not hung, just busy */
1873         if (reset_mask & RADEON_RESET_MC) {
1874                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1875                 reset_mask &= ~RADEON_RESET_MC;
1876         }
1877
1878         return reset_mask;
1879 }
1880
1881 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1882 {
1883         struct evergreen_mc_save save;
1884         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1885         u32 tmp;
1886
1887         if (reset_mask == 0)
1888                 return;
1889
1890         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1891
1892         evergreen_print_gpu_status_regs(rdev);
1893         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1894                  RREG32(0x14F8));
1895         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1896                  RREG32(0x14D8));
1897         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1898                  RREG32(0x14FC));
1899         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1900                  RREG32(0x14DC));
1901
1902         /* Disable CP parsing/prefetching */
1903         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1904
1905         if (reset_mask & RADEON_RESET_DMA) {
1906                 /* dma0 */
1907                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1908                 tmp &= ~DMA_RB_ENABLE;
1909                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1910         }
1911
1912         if (reset_mask & RADEON_RESET_DMA1) {
1913                 /* dma1 */
1914                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1915                 tmp &= ~DMA_RB_ENABLE;
1916                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1917         }
1918
1919         DRM_UDELAY(50);
1920
1921         evergreen_mc_stop(rdev, &save);
1922         if (evergreen_mc_wait_for_idle(rdev)) {
1923                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1924         }
1925
1926         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1927                 grbm_soft_reset = SOFT_RESET_CB |
1928                         SOFT_RESET_DB |
1929                         SOFT_RESET_GDS |
1930                         SOFT_RESET_PA |
1931                         SOFT_RESET_SC |
1932                         SOFT_RESET_SPI |
1933                         SOFT_RESET_SH |
1934                         SOFT_RESET_SX |
1935                         SOFT_RESET_TC |
1936                         SOFT_RESET_TA |
1937                         SOFT_RESET_VGT |
1938                         SOFT_RESET_IA;
1939         }
1940
1941         if (reset_mask & RADEON_RESET_CP) {
1942                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1943
1944                 srbm_soft_reset |= SOFT_RESET_GRBM;
1945         }
1946
1947         if (reset_mask & RADEON_RESET_DMA)
1948                 srbm_soft_reset |= SOFT_RESET_DMA;
1949
1950         if (reset_mask & RADEON_RESET_DMA1)
1951                 srbm_soft_reset |= SOFT_RESET_DMA1;
1952
1953         if (reset_mask & RADEON_RESET_DISPLAY)
1954                 srbm_soft_reset |= SOFT_RESET_DC;
1955
1956         if (reset_mask & RADEON_RESET_RLC)
1957                 srbm_soft_reset |= SOFT_RESET_RLC;
1958
1959         if (reset_mask & RADEON_RESET_SEM)
1960                 srbm_soft_reset |= SOFT_RESET_SEM;
1961
1962         if (reset_mask & RADEON_RESET_IH)
1963                 srbm_soft_reset |= SOFT_RESET_IH;
1964
1965         if (reset_mask & RADEON_RESET_GRBM)
1966                 srbm_soft_reset |= SOFT_RESET_GRBM;
1967
1968         if (reset_mask & RADEON_RESET_VMC)
1969                 srbm_soft_reset |= SOFT_RESET_VMC;
1970
1971         if (!(rdev->flags & RADEON_IS_IGP)) {
1972                 if (reset_mask & RADEON_RESET_MC)
1973                         srbm_soft_reset |= SOFT_RESET_MC;
1974         }
1975
1976         if (grbm_soft_reset) {
1977                 tmp = RREG32(GRBM_SOFT_RESET);
1978                 tmp |= grbm_soft_reset;
1979                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1980                 WREG32(GRBM_SOFT_RESET, tmp);
1981                 tmp = RREG32(GRBM_SOFT_RESET);
1982
1983                 DRM_UDELAY(50);
1984
1985                 tmp &= ~grbm_soft_reset;
1986                 WREG32(GRBM_SOFT_RESET, tmp);
1987                 tmp = RREG32(GRBM_SOFT_RESET);
1988         }
1989
1990         if (srbm_soft_reset) {
1991                 tmp = RREG32(SRBM_SOFT_RESET);
1992                 tmp |= srbm_soft_reset;
1993                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1994                 WREG32(SRBM_SOFT_RESET, tmp);
1995                 tmp = RREG32(SRBM_SOFT_RESET);
1996
1997                 DRM_UDELAY(50);
1998
1999                 tmp &= ~srbm_soft_reset;
2000                 WREG32(SRBM_SOFT_RESET, tmp);
2001                 tmp = RREG32(SRBM_SOFT_RESET);
2002         }
2003
2004         /* Wait a little for things to settle down */
2005         DRM_UDELAY(50);
2006
2007         evergreen_mc_resume(rdev, &save);
2008         DRM_UDELAY(50);
2009
2010         evergreen_print_gpu_status_regs(rdev);
2011 }
2012
2013 int cayman_asic_reset(struct radeon_device *rdev)
2014 {
2015         u32 reset_mask;
2016
2017         reset_mask = cayman_gpu_check_soft_reset(rdev);
2018
2019         if (reset_mask)
2020                 r600_set_bios_scratch_engine_hung(rdev, true);
2021
2022         cayman_gpu_soft_reset(rdev, reset_mask);
2023
2024         reset_mask = cayman_gpu_check_soft_reset(rdev);
2025
2026         if (!reset_mask)
2027                 r600_set_bios_scratch_engine_hung(rdev, false);
2028
2029         return 0;
2030 }
2031
2032 /**
2033  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
2034  *
2035  * @rdev: radeon_device pointer
2036  * @ring: radeon_ring structure holding ring information
2037  *
2038  * Check if the GFX engine is locked up.
2039  * Returns true if the engine appears to be locked up, false if not.
2040  */
2041 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2042 {
2043         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2044
2045         if (!(reset_mask & (RADEON_RESET_GFX |
2046                             RADEON_RESET_COMPUTE |
2047                             RADEON_RESET_CP))) {
2048                 radeon_ring_lockup_update(ring);
2049                 return false;
2050         }
2051         /* force CP activities */
2052         radeon_ring_force_activity(rdev, ring);
2053         return radeon_ring_test_lockup(rdev, ring);
2054 }
2055
2056 /**
2057  * cayman_dma_is_lockup - Check if the DMA engine is locked up
2058  *
2059  * @rdev: radeon_device pointer
2060  * @ring: radeon_ring structure holding ring information
2061  *
2062  * Check if the async DMA engine is locked up.
2063  * Returns true if the engine appears to be locked up, false if not.
2064  */
2065 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2066 {
2067         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2068         u32 mask;
2069
2070         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
2071                 mask = RADEON_RESET_DMA;
2072         else
2073                 mask = RADEON_RESET_DMA1;
2074
2075         if (!(reset_mask & mask)) {
2076                 radeon_ring_lockup_update(ring);
2077                 return false;
2078         }
2079         /* force ring activities */
2080         radeon_ring_force_activity(rdev, ring);
2081         return radeon_ring_test_lockup(rdev, ring);
2082 }
2083
2084 static int cayman_startup(struct radeon_device *rdev)
2085 {
2086         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2087         int r;
2088
2089         /* enable pcie gen2 link */
2090         evergreen_pcie_gen2_enable(rdev);
2091         /* enable aspm */
2092         evergreen_program_aspm(rdev);
2093
2094         evergreen_mc_program(rdev);
2095
2096         if (rdev->flags & RADEON_IS_IGP) {
2097                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
2098                         r = ni_init_microcode(rdev);
2099                         if (r) {
2100                                 DRM_ERROR("Failed to load firmware!\n");
2101                                 return r;
2102                         }
2103                 }
2104         } else {
2105                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
2106                         r = ni_init_microcode(rdev);
2107                         if (r) {
2108                                 DRM_ERROR("Failed to load firmware!\n");
2109                                 return r;
2110                         }
2111                 }
2112
2113                 r = ni_mc_load_microcode(rdev);
2114                 if (r) {
2115                         DRM_ERROR("Failed to load MC firmware!\n");
2116                         return r;
2117                 }
2118         }
2119
2120         r = r600_vram_scratch_init(rdev);
2121         if (r)
2122                 return r;
2123
2124         r = cayman_pcie_gart_enable(rdev);
2125         if (r)
2126                 return r;
2127         cayman_gpu_init(rdev);
2128
2129         r = evergreen_blit_init(rdev);
2130         if (r) {
2131                 r600_blit_fini(rdev);
2132                 rdev->asic->copy.copy = NULL;
2133                 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
2134         }
2135
2136         /* allocate rlc buffers */
2137         if (rdev->flags & RADEON_IS_IGP) {
2138                 rdev->rlc.reg_list = tn_rlc_save_restore_register_list;
2139                 rdev->rlc.reg_list_size = tn_rlc_save_restore_register_list_size;
2140                 rdev->rlc.cs_data = cayman_cs_data;
2141                 r = sumo_rlc_init(rdev);
2142                 if (r) {
2143                         DRM_ERROR("Failed to init rlc BOs!\n");
2144                         return r;
2145                 }
2146         }
2147
2148         /* allocate wb buffer */
2149         r = radeon_wb_init(rdev);
2150         if (r)
2151                 return r;
2152
2153         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
2154         if (r) {
2155                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2156                 return r;
2157         }
2158
2159         r = rv770_uvd_resume(rdev);
2160         if (!r) {
2161                 r = radeon_fence_driver_start_ring(rdev,
2162                                                    R600_RING_TYPE_UVD_INDEX);
2163                 if (r)
2164                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
2165         }
2166         if (r)
2167                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
2168
2169         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
2170         if (r) {
2171                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2172                 return r;
2173         }
2174
2175         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
2176         if (r) {
2177                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2178                 return r;
2179         }
2180
2181         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2182         if (r) {
2183                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2184                 return r;
2185         }
2186
2187         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
2188         if (r) {
2189                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2190                 return r;
2191         }
2192
2193         /* Enable IRQ */
2194         if (!rdev->irq.installed) {
2195                 r = radeon_irq_kms_init(rdev);
2196                 if (r)
2197                         return r;
2198         }
2199
2200         r = r600_irq_init(rdev);
2201         if (r) {
2202                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
2203                 radeon_irq_kms_fini(rdev);
2204                 return r;
2205         }
2206         evergreen_irq_set(rdev);
2207
2208         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2209                              CP_RB0_RPTR, CP_RB0_WPTR,
2210                              0, 0xfffff, RADEON_CP_PACKET2);
2211         if (r)
2212                 return r;
2213
2214         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2215         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2216                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
2217                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
2218                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2219         if (r)
2220                 return r;
2221
2222         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2223         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
2224                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
2225                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
2226                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2227         if (r)
2228                 return r;
2229
2230         r = cayman_cp_load_microcode(rdev);
2231         if (r)
2232                 return r;
2233         r = cayman_cp_resume(rdev);
2234         if (r)
2235                 return r;
2236
2237         r = cayman_dma_resume(rdev);
2238         if (r)
2239                 return r;
2240
2241         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2242         if (ring->ring_size) {
2243                 r = radeon_ring_init(rdev, ring, ring->ring_size,
2244                                      R600_WB_UVD_RPTR_OFFSET,
2245                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2246                                      0, 0xfffff, RADEON_CP_PACKET2);
2247                 if (!r)
2248                         r = r600_uvd_init(rdev);
2249                 if (r)
2250                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2251         }
2252
2253         r = radeon_ib_pool_init(rdev);
2254         if (r) {
2255                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2256                 return r;
2257         }
2258
2259         r = radeon_vm_manager_init(rdev);
2260         if (r) {
2261                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2262                 return r;
2263         }
2264
2265         r = r600_audio_init(rdev);
2266         if (r)
2267                 return r;
2268
2269         return 0;
2270 }
2271
2272 int cayman_resume(struct radeon_device *rdev)
2273 {
2274         int r;
2275
2276         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2277          * posting will perform necessary task to bring back GPU into good
2278          * shape.
2279          */
2280         /* post card */
2281         atom_asic_init(rdev->mode_info.atom_context);
2282
2283         /* init golden registers */
2284         ni_init_golden_registers(rdev);
2285
2286         rdev->accel_working = true;
2287         r = cayman_startup(rdev);
2288         if (r) {
2289                 DRM_ERROR("cayman startup failed on resume\n");
2290                 rdev->accel_working = false;
2291                 return r;
2292         }
2293         return r;
2294 }
2295
2296 int cayman_suspend(struct radeon_device *rdev)
2297 {
2298         r600_audio_fini(rdev);
2299         radeon_vm_manager_fini(rdev);
2300         cayman_cp_enable(rdev, false);
2301         cayman_dma_stop(rdev);
2302         r600_uvd_stop(rdev);
2303         radeon_uvd_suspend(rdev);
2304         evergreen_irq_suspend(rdev);
2305         radeon_wb_disable(rdev);
2306         cayman_pcie_gart_disable(rdev);
2307         return 0;
2308 }
2309
2310 /* Plan is to move initialization in that function and use
2311  * helper function so that radeon_device_init pretty much
2312  * do nothing more than calling asic specific function. This
2313  * should also allow to remove a bunch of callback function
2314  * like vram_info.
2315  */
2316 int cayman_init(struct radeon_device *rdev)
2317 {
2318         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2319         int r;
2320
2321         /* Read BIOS */
2322         if (!radeon_get_bios(rdev)) {
2323                 if (ASIC_IS_AVIVO(rdev))
2324                         return -EINVAL;
2325         }
2326         /* Must be an ATOMBIOS */
2327         if (!rdev->is_atom_bios) {
2328                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2329                 return -EINVAL;
2330         }
2331         r = radeon_atombios_init(rdev);
2332         if (r)
2333                 return r;
2334
2335         /* Post card if necessary */
2336         if (!radeon_card_posted(rdev)) {
2337                 if (!rdev->bios) {
2338                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2339                         return -EINVAL;
2340                 }
2341                 DRM_INFO("GPU not posted. posting now...\n");
2342                 atom_asic_init(rdev->mode_info.atom_context);
2343         }
2344         /* init golden registers */
2345         ni_init_golden_registers(rdev);
2346         /* Initialize scratch registers */
2347         r600_scratch_init(rdev);
2348         /* Initialize surface registers */
2349         radeon_surface_init(rdev);
2350         /* Initialize clocks */
2351         radeon_get_clock_info(rdev->ddev);
2352         /* Fence driver */
2353         r = radeon_fence_driver_init(rdev);
2354         if (r)
2355                 return r;
2356         /* initialize memory controller */
2357         r = evergreen_mc_init(rdev);
2358         if (r)
2359                 return r;
2360         /* Memory manager */
2361         r = radeon_bo_init(rdev);
2362         if (r)
2363                 return r;
2364
2365         ring->ring_obj = NULL;
2366         r600_ring_init(rdev, ring, 1024 * 1024);
2367
2368         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2369         ring->ring_obj = NULL;
2370         r600_ring_init(rdev, ring, 64 * 1024);
2371
2372         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2373         ring->ring_obj = NULL;
2374         r600_ring_init(rdev, ring, 64 * 1024);
2375
2376         r = radeon_uvd_init(rdev);
2377         if (!r) {
2378                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2379                 ring->ring_obj = NULL;
2380                 r600_ring_init(rdev, ring, 4096);
2381         }
2382
2383         rdev->ih.ring_obj = NULL;
2384         r600_ih_ring_init(rdev, 64 * 1024);
2385
2386         r = r600_pcie_gart_init(rdev);
2387         if (r)
2388                 return r;
2389
2390         rdev->accel_working = true;
2391         r = cayman_startup(rdev);
2392         if (r) {
2393                 dev_err(rdev->dev, "disabling GPU acceleration\n");
2394                 cayman_cp_fini(rdev);
2395                 cayman_dma_fini(rdev);
2396                 r600_irq_fini(rdev);
2397                 if (rdev->flags & RADEON_IS_IGP)
2398                         sumo_rlc_fini(rdev);
2399                 radeon_wb_fini(rdev);
2400                 radeon_ib_pool_fini(rdev);
2401                 radeon_vm_manager_fini(rdev);
2402                 radeon_irq_kms_fini(rdev);
2403                 cayman_pcie_gart_fini(rdev);
2404                 rdev->accel_working = false;
2405         }
2406
2407         /* Don't start up if the MC ucode is missing.
2408          * The default clocks and voltages before the MC ucode
2409          * is loaded are not suffient for advanced operations.
2410          *
2411          * We can skip this check for TN, because there is no MC
2412          * ucode.
2413          */
2414         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2415                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
2416                 return -EINVAL;
2417         }
2418
2419         return 0;
2420 }
2421
2422 void cayman_fini(struct radeon_device *rdev)
2423 {
2424         r600_blit_fini(rdev);
2425         cayman_cp_fini(rdev);
2426         cayman_dma_fini(rdev);
2427         r600_irq_fini(rdev);
2428         if (rdev->flags & RADEON_IS_IGP)
2429                 sumo_rlc_fini(rdev);
2430         radeon_wb_fini(rdev);
2431         radeon_vm_manager_fini(rdev);
2432         radeon_ib_pool_fini(rdev);
2433         radeon_irq_kms_fini(rdev);
2434         r600_uvd_stop(rdev);
2435         radeon_uvd_fini(rdev);
2436         cayman_pcie_gart_fini(rdev);
2437         r600_vram_scratch_fini(rdev);
2438         radeon_gem_fini(rdev);
2439         radeon_fence_driver_fini(rdev);
2440         radeon_bo_fini(rdev);
2441         radeon_atombios_fini(rdev);
2442         ni_fini_microcode(rdev);
2443         drm_free(rdev->bios, M_DRM);
2444         rdev->bios = NULL;
2445 }
2446
2447 /*
2448  * vm
2449  */
2450 int cayman_vm_init(struct radeon_device *rdev)
2451 {
2452         /* number of VMs */
2453         rdev->vm_manager.nvm = 8;
2454         /* base offset of vram pages */
2455         if (rdev->flags & RADEON_IS_IGP) {
2456                 u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2457                 tmp <<= 22;
2458                 rdev->vm_manager.vram_base_offset = tmp;
2459         } else
2460                 rdev->vm_manager.vram_base_offset = 0;
2461         return 0;
2462 }
2463
2464 void cayman_vm_fini(struct radeon_device *rdev)
2465 {
2466 }
2467
2468 /**
2469  * cayman_vm_decode_fault - print human readable fault info
2470  *
2471  * @rdev: radeon_device pointer
2472  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
2473  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
2474  *
2475  * Print human readable fault information (cayman/TN).
2476  */
2477 void cayman_vm_decode_fault(struct radeon_device *rdev,
2478                             u32 status, u32 addr)
2479 {
2480         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
2481         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
2482         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
2483         char *block;
2484
2485         switch (mc_id) {
2486         case 32:
2487         case 16:
2488         case 96:
2489         case 80:
2490         case 160:
2491         case 144:
2492         case 224:
2493         case 208:
2494                 block = "CB";
2495                 break;
2496         case 33:
2497         case 17:
2498         case 97:
2499         case 81:
2500         case 161:
2501         case 145:
2502         case 225:
2503         case 209:
2504                 block = "CB_FMASK";
2505                 break;
2506         case 34:
2507         case 18:
2508         case 98:
2509         case 82:
2510         case 162:
2511         case 146:
2512         case 226:
2513         case 210:
2514                 block = "CB_CMASK";
2515                 break;
2516         case 35:
2517         case 19:
2518         case 99:
2519         case 83:
2520         case 163:
2521         case 147:
2522         case 227:
2523         case 211:
2524                 block = "CB_IMMED";
2525                 break;
2526         case 36:
2527         case 20:
2528         case 100:
2529         case 84:
2530         case 164:
2531         case 148:
2532         case 228:
2533         case 212:
2534                 block = "DB";
2535                 break;
2536         case 37:
2537         case 21:
2538         case 101:
2539         case 85:
2540         case 165:
2541         case 149:
2542         case 229:
2543         case 213:
2544                 block = "DB_HTILE";
2545                 break;
2546         case 38:
2547         case 22:
2548         case 102:
2549         case 86:
2550         case 166:
2551         case 150:
2552         case 230:
2553         case 214:
2554                 block = "SX";
2555                 break;
2556         case 39:
2557         case 23:
2558         case 103:
2559         case 87:
2560         case 167:
2561         case 151:
2562         case 231:
2563         case 215:
2564                 block = "DB_STEN";
2565                 break;
2566         case 40:
2567         case 24:
2568         case 104:
2569         case 88:
2570         case 232:
2571         case 216:
2572         case 168:
2573         case 152:
2574                 block = "TC_TFETCH";
2575                 break;
2576         case 41:
2577         case 25:
2578         case 105:
2579         case 89:
2580         case 233:
2581         case 217:
2582         case 169:
2583         case 153:
2584                 block = "TC_VFETCH";
2585                 break;
2586         case 42:
2587         case 26:
2588         case 106:
2589         case 90:
2590         case 234:
2591         case 218:
2592         case 170:
2593         case 154:
2594                 block = "VC";
2595                 break;
2596         case 112:
2597                 block = "CP";
2598                 break;
2599         case 113:
2600         case 114:
2601                 block = "SH";
2602                 break;
2603         case 115:
2604                 block = "VGT";
2605                 break;
2606         case 178:
2607                 block = "IH";
2608                 break;
2609         case 51:
2610                 block = "RLC";
2611                 break;
2612         case 55:
2613                 block = "DMA";
2614                 break;
2615         case 56:
2616                 block = "HDP";
2617                 break;
2618         default:
2619                 block = "unknown";
2620                 break;
2621         }
2622
2623         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
2624                protections, vmid, addr,
2625                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
2626                block, mc_id);
2627 }
2628
2629 #define R600_ENTRY_VALID   (1 << 0)
2630 #define R600_PTE_SYSTEM    (1 << 1)
2631 #define R600_PTE_SNOOPED   (1 << 2)
2632 #define R600_PTE_READABLE  (1 << 5)
2633 #define R600_PTE_WRITEABLE (1 << 6)
2634
2635 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2636 {
2637         uint32_t r600_flags = 0;
2638         r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2639         r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2640         r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2641         if (flags & RADEON_VM_PAGE_SYSTEM) {
2642                 r600_flags |= R600_PTE_SYSTEM;
2643                 r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2644         }
2645         return r600_flags;
2646 }
2647
2648 /**
2649  * cayman_vm_set_page - update the page tables using the CP
2650  *
2651  * @rdev: radeon_device pointer
2652  * @ib: indirect buffer to fill with commands
2653  * @pe: addr of the page entry
2654  * @addr: dst addr to write into pe
2655  * @count: number of page entries to update
2656  * @incr: increase next addr by incr bytes
2657  * @flags: access flags
2658  *
2659  * Update the page tables using the CP (cayman/TN).
2660  */
2661 void cayman_vm_set_page(struct radeon_device *rdev,
2662                         struct radeon_ib *ib,
2663                         uint64_t pe,
2664                         uint64_t addr, unsigned count,
2665                         uint32_t incr, uint32_t flags)
2666 {
2667         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2668         uint64_t value;
2669         unsigned ndw;
2670
2671         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2672                 while (count) {
2673                         ndw = 1 + count * 2;
2674                         if (ndw > 0x3FFF)
2675                                 ndw = 0x3FFF;
2676
2677                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2678                         ib->ptr[ib->length_dw++] = pe;
2679                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2680                         for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2681                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
2682                                         value = radeon_vm_map_gart(rdev, addr);
2683                                         value &= 0xFFFFFFFFFFFFF000ULL;
2684                                 } else if (flags & RADEON_VM_PAGE_VALID) {
2685                                         value = addr;
2686                                 } else {
2687                                         value = 0;
2688                                 }
2689                                 addr += incr;
2690                                 value |= r600_flags;
2691                                 ib->ptr[ib->length_dw++] = value;
2692                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2693                         }
2694                 }
2695         } else {
2696                 if ((flags & RADEON_VM_PAGE_SYSTEM) ||
2697                     (count == 1)) {
2698                         while (count) {
2699                                 ndw = count * 2;
2700                                 if (ndw > 0xFFFFE)
2701                                         ndw = 0xFFFFE;
2702
2703                                 /* for non-physically contiguous pages (system) */
2704                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
2705                                 ib->ptr[ib->length_dw++] = pe;
2706                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2707                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2708                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
2709                                                 value = radeon_vm_map_gart(rdev, addr);
2710                                                 value &= 0xFFFFFFFFFFFFF000ULL;
2711                                         } else if (flags & RADEON_VM_PAGE_VALID) {
2712                                                 value = addr;
2713                                         } else {
2714                                                 value = 0;
2715                                         }
2716                                         addr += incr;
2717                                         value |= r600_flags;
2718                                         ib->ptr[ib->length_dw++] = value;
2719                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
2720                                 }
2721                         }
2722                         while (ib->length_dw & 0x7)
2723                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2724                 } else {
2725                         while (count) {
2726                                 ndw = count * 2;
2727                                 if (ndw > 0xFFFFE)
2728                                         ndw = 0xFFFFE;
2729
2730                                 if (flags & RADEON_VM_PAGE_VALID)
2731                                         value = addr;
2732                                 else
2733                                         value = 0;
2734                                 /* for physically contiguous pages (vram) */
2735                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
2736                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
2737                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2738                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
2739                                 ib->ptr[ib->length_dw++] = 0;
2740                                 ib->ptr[ib->length_dw++] = value; /* value */
2741                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2742                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
2743                                 ib->ptr[ib->length_dw++] = 0;
2744                                 pe += ndw * 4;
2745                                 addr += (ndw / 2) * incr;
2746                                 count -= ndw / 2;
2747                         }
2748                 }
2749                 while (ib->length_dw & 0x7)
2750                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2751         }
2752 }
2753
2754 /**
2755  * cayman_vm_flush - vm flush using the CP
2756  *
2757  * @rdev: radeon_device pointer
2758  *
2759  * Update the page table base and flush the VM TLB
2760  * using the CP (cayman-si).
2761  */
2762 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2763 {
2764         struct radeon_ring *ring = &rdev->ring[ridx];
2765
2766         if (vm == NULL)
2767                 return;
2768
2769         radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2770         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2771
2772         /* flush hdp cache */
2773         radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2774         radeon_ring_write(ring, 0x1);
2775
2776         /* bits 0-7 are the VM contexts0-7 */
2777         radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2778         radeon_ring_write(ring, 1 << vm->id);
2779
2780         /* sync PFP to ME, otherwise we might get invalid PFP reads */
2781         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2782         radeon_ring_write(ring, 0x0);
2783 }
2784
2785 void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2786 {
2787         struct radeon_ring *ring = &rdev->ring[ridx];
2788
2789         if (vm == NULL)
2790                 return;
2791
2792         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2793         radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2794         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2795
2796         /* flush hdp cache */
2797         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2798         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2799         radeon_ring_write(ring, 1);
2800
2801         /* bits 0-7 are the VM contexts0-7 */
2802         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2803         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2804         radeon_ring_write(ring, 1 << vm->id);
2805 }
2806