radeon: sync to radeon 3.10
[dragonfly.git] / sys / dev / drm / radeon / ni.c
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  * $FreeBSD: head/sys/dev/drm2/radeon/ni.c 254885 2013-08-25 19:37:15Z dumbbell $
24  */
25
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include <uapi_drm/radeon_drm.h>
30 #include "nid.h"
31 #include "atom.h"
32 #include "ni_reg.h"
33 #include "cayman_blit_shaders.h"
34
35 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
36 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
37 extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
38
39 #define EVERGREEN_PFP_UCODE_SIZE 1120
40 #define EVERGREEN_PM4_UCODE_SIZE 1376
41 #define EVERGREEN_RLC_UCODE_SIZE 768
42 #define BTC_MC_UCODE_SIZE 6024
43
44 #define CAYMAN_PFP_UCODE_SIZE 2176
45 #define CAYMAN_PM4_UCODE_SIZE 2176
46 #define CAYMAN_RLC_UCODE_SIZE 1024
47 #define CAYMAN_MC_UCODE_SIZE 6037
48
49 #define ARUBA_RLC_UCODE_SIZE 1536
50
51
52 static const u32 cayman_golden_registers2[] =
53 {
54         0x3e5c, 0xffffffff, 0x00000000,
55         0x3e48, 0xffffffff, 0x00000000,
56         0x3e4c, 0xffffffff, 0x00000000,
57         0x3e64, 0xffffffff, 0x00000000,
58         0x3e50, 0xffffffff, 0x00000000,
59         0x3e60, 0xffffffff, 0x00000000
60 };
61
62 static const u32 cayman_golden_registers[] =
63 {
64         0x5eb4, 0xffffffff, 0x00000002,
65         0x5e78, 0x8f311ff1, 0x001000f0,
66         0x3f90, 0xffff0000, 0xff000000,
67         0x9148, 0xffff0000, 0xff000000,
68         0x3f94, 0xffff0000, 0xff000000,
69         0x914c, 0xffff0000, 0xff000000,
70         0xc78, 0x00000080, 0x00000080,
71         0xbd4, 0x70073777, 0x00011003,
72         0xd02c, 0xbfffff1f, 0x08421000,
73         0xd0b8, 0x73773777, 0x02011003,
74         0x5bc0, 0x00200000, 0x50100000,
75         0x98f8, 0x33773777, 0x02011003,
76         0x98fc, 0xffffffff, 0x76541032,
77         0x7030, 0x31000311, 0x00000011,
78         0x2f48, 0x33773777, 0x42010001,
79         0x6b28, 0x00000010, 0x00000012,
80         0x7728, 0x00000010, 0x00000012,
81         0x10328, 0x00000010, 0x00000012,
82         0x10f28, 0x00000010, 0x00000012,
83         0x11b28, 0x00000010, 0x00000012,
84         0x12728, 0x00000010, 0x00000012,
85         0x240c, 0x000007ff, 0x00000000,
86         0x8a14, 0xf000001f, 0x00000007,
87         0x8b24, 0x3fff3fff, 0x00ff0fff,
88         0x8b10, 0x0000ff0f, 0x00000000,
89         0x28a4c, 0x07ffffff, 0x06000000,
90         0x10c, 0x00000001, 0x00010003,
91         0xa02c, 0xffffffff, 0x0000009b,
92         0x913c, 0x0000010f, 0x01000100,
93         0x8c04, 0xf8ff00ff, 0x40600060,
94         0x28350, 0x00000f01, 0x00000000,
95         0x9508, 0x3700001f, 0x00000002,
96         0x960c, 0xffffffff, 0x54763210,
97         0x88c4, 0x001f3ae3, 0x00000082,
98         0x88d0, 0xffffffff, 0x0f40df40,
99         0x88d4, 0x0000001f, 0x00000010,
100         0x8974, 0xffffffff, 0x00000000
101 };
102
103 static const u32 dvst_golden_registers2[] =
104 {
105         0x8f8, 0xffffffff, 0,
106         0x8fc, 0x00380000, 0,
107         0x8f8, 0xffffffff, 1,
108         0x8fc, 0x0e000000, 0
109 };
110
111 static const u32 dvst_golden_registers[] =
112 {
113         0x690, 0x3fff3fff, 0x20c00033,
114         0x918c, 0x0fff0fff, 0x00010006,
115         0x91a8, 0x0fff0fff, 0x00010006,
116         0x9150, 0xffffdfff, 0x6e944040,
117         0x917c, 0x0fff0fff, 0x00030002,
118         0x9198, 0x0fff0fff, 0x00030002,
119         0x915c, 0x0fff0fff, 0x00010000,
120         0x3f90, 0xffff0001, 0xff000000,
121         0x9178, 0x0fff0fff, 0x00070000,
122         0x9194, 0x0fff0fff, 0x00070000,
123         0x9148, 0xffff0001, 0xff000000,
124         0x9190, 0x0fff0fff, 0x00090008,
125         0x91ac, 0x0fff0fff, 0x00090008,
126         0x3f94, 0xffff0000, 0xff000000,
127         0x914c, 0xffff0000, 0xff000000,
128         0x929c, 0x00000fff, 0x00000001,
129         0x55e4, 0xff607fff, 0xfc000100,
130         0x8a18, 0xff000fff, 0x00000100,
131         0x8b28, 0xff000fff, 0x00000100,
132         0x9144, 0xfffc0fff, 0x00000100,
133         0x6ed8, 0x00010101, 0x00010000,
134         0x9830, 0xffffffff, 0x00000000,
135         0x9834, 0xf00fffff, 0x00000400,
136         0x9838, 0xfffffffe, 0x00000000,
137         0xd0c0, 0xff000fff, 0x00000100,
138         0xd02c, 0xbfffff1f, 0x08421000,
139         0xd0b8, 0x73773777, 0x12010001,
140         0x5bb0, 0x000000f0, 0x00000070,
141         0x98f8, 0x73773777, 0x12010001,
142         0x98fc, 0xffffffff, 0x00000010,
143         0x9b7c, 0x00ff0000, 0x00fc0000,
144         0x8030, 0x00001f0f, 0x0000100a,
145         0x2f48, 0x73773777, 0x12010001,
146         0x2408, 0x00030000, 0x000c007f,
147         0x8a14, 0xf000003f, 0x00000007,
148         0x8b24, 0x3fff3fff, 0x00ff0fff,
149         0x8b10, 0x0000ff0f, 0x00000000,
150         0x28a4c, 0x07ffffff, 0x06000000,
151         0x4d8, 0x00000fff, 0x00000100,
152         0xa008, 0xffffffff, 0x00010000,
153         0x913c, 0xffff03ff, 0x01000100,
154         0x8c00, 0x000000ff, 0x00000003,
155         0x8c04, 0xf8ff00ff, 0x40600060,
156         0x8cf0, 0x1fff1fff, 0x08e00410,
157         0x28350, 0x00000f01, 0x00000000,
158         0x9508, 0xf700071f, 0x00000002,
159         0x960c, 0xffffffff, 0x54763210,
160         0x20ef8, 0x01ff01ff, 0x00000002,
161         0x20e98, 0xfffffbff, 0x00200000,
162         0x2015c, 0xffffffff, 0x00000f40,
163         0x88c4, 0x001f3ae3, 0x00000082,
164         0x8978, 0x3fffffff, 0x04050140,
165         0x88d4, 0x0000001f, 0x00000010,
166         0x8974, 0xffffffff, 0x00000000
167 };
168
169 static const u32 scrapper_golden_registers[] =
170 {
171         0x690, 0x3fff3fff, 0x20c00033,
172         0x918c, 0x0fff0fff, 0x00010006,
173         0x918c, 0x0fff0fff, 0x00010006,
174         0x91a8, 0x0fff0fff, 0x00010006,
175         0x91a8, 0x0fff0fff, 0x00010006,
176         0x9150, 0xffffdfff, 0x6e944040,
177         0x9150, 0xffffdfff, 0x6e944040,
178         0x917c, 0x0fff0fff, 0x00030002,
179         0x917c, 0x0fff0fff, 0x00030002,
180         0x9198, 0x0fff0fff, 0x00030002,
181         0x9198, 0x0fff0fff, 0x00030002,
182         0x915c, 0x0fff0fff, 0x00010000,
183         0x915c, 0x0fff0fff, 0x00010000,
184         0x3f90, 0xffff0001, 0xff000000,
185         0x3f90, 0xffff0001, 0xff000000,
186         0x9178, 0x0fff0fff, 0x00070000,
187         0x9178, 0x0fff0fff, 0x00070000,
188         0x9194, 0x0fff0fff, 0x00070000,
189         0x9194, 0x0fff0fff, 0x00070000,
190         0x9148, 0xffff0001, 0xff000000,
191         0x9148, 0xffff0001, 0xff000000,
192         0x9190, 0x0fff0fff, 0x00090008,
193         0x9190, 0x0fff0fff, 0x00090008,
194         0x91ac, 0x0fff0fff, 0x00090008,
195         0x91ac, 0x0fff0fff, 0x00090008,
196         0x3f94, 0xffff0000, 0xff000000,
197         0x3f94, 0xffff0000, 0xff000000,
198         0x914c, 0xffff0000, 0xff000000,
199         0x914c, 0xffff0000, 0xff000000,
200         0x929c, 0x00000fff, 0x00000001,
201         0x929c, 0x00000fff, 0x00000001,
202         0x55e4, 0xff607fff, 0xfc000100,
203         0x8a18, 0xff000fff, 0x00000100,
204         0x8a18, 0xff000fff, 0x00000100,
205         0x8b28, 0xff000fff, 0x00000100,
206         0x8b28, 0xff000fff, 0x00000100,
207         0x9144, 0xfffc0fff, 0x00000100,
208         0x9144, 0xfffc0fff, 0x00000100,
209         0x6ed8, 0x00010101, 0x00010000,
210         0x9830, 0xffffffff, 0x00000000,
211         0x9830, 0xffffffff, 0x00000000,
212         0x9834, 0xf00fffff, 0x00000400,
213         0x9834, 0xf00fffff, 0x00000400,
214         0x9838, 0xfffffffe, 0x00000000,
215         0x9838, 0xfffffffe, 0x00000000,
216         0xd0c0, 0xff000fff, 0x00000100,
217         0xd02c, 0xbfffff1f, 0x08421000,
218         0xd02c, 0xbfffff1f, 0x08421000,
219         0xd0b8, 0x73773777, 0x12010001,
220         0xd0b8, 0x73773777, 0x12010001,
221         0x5bb0, 0x000000f0, 0x00000070,
222         0x98f8, 0x73773777, 0x12010001,
223         0x98f8, 0x73773777, 0x12010001,
224         0x98fc, 0xffffffff, 0x00000010,
225         0x98fc, 0xffffffff, 0x00000010,
226         0x9b7c, 0x00ff0000, 0x00fc0000,
227         0x9b7c, 0x00ff0000, 0x00fc0000,
228         0x8030, 0x00001f0f, 0x0000100a,
229         0x8030, 0x00001f0f, 0x0000100a,
230         0x2f48, 0x73773777, 0x12010001,
231         0x2f48, 0x73773777, 0x12010001,
232         0x2408, 0x00030000, 0x000c007f,
233         0x8a14, 0xf000003f, 0x00000007,
234         0x8a14, 0xf000003f, 0x00000007,
235         0x8b24, 0x3fff3fff, 0x00ff0fff,
236         0x8b24, 0x3fff3fff, 0x00ff0fff,
237         0x8b10, 0x0000ff0f, 0x00000000,
238         0x8b10, 0x0000ff0f, 0x00000000,
239         0x28a4c, 0x07ffffff, 0x06000000,
240         0x28a4c, 0x07ffffff, 0x06000000,
241         0x4d8, 0x00000fff, 0x00000100,
242         0x4d8, 0x00000fff, 0x00000100,
243         0xa008, 0xffffffff, 0x00010000,
244         0xa008, 0xffffffff, 0x00010000,
245         0x913c, 0xffff03ff, 0x01000100,
246         0x913c, 0xffff03ff, 0x01000100,
247         0x90e8, 0x001fffff, 0x010400c0,
248         0x8c00, 0x000000ff, 0x00000003,
249         0x8c00, 0x000000ff, 0x00000003,
250         0x8c04, 0xf8ff00ff, 0x40600060,
251         0x8c04, 0xf8ff00ff, 0x40600060,
252         0x8c30, 0x0000000f, 0x00040005,
253         0x8cf0, 0x1fff1fff, 0x08e00410,
254         0x8cf0, 0x1fff1fff, 0x08e00410,
255         0x900c, 0x00ffffff, 0x0017071f,
256         0x28350, 0x00000f01, 0x00000000,
257         0x28350, 0x00000f01, 0x00000000,
258         0x9508, 0xf700071f, 0x00000002,
259         0x9508, 0xf700071f, 0x00000002,
260         0x9688, 0x00300000, 0x0017000f,
261         0x960c, 0xffffffff, 0x54763210,
262         0x960c, 0xffffffff, 0x54763210,
263         0x20ef8, 0x01ff01ff, 0x00000002,
264         0x20e98, 0xfffffbff, 0x00200000,
265         0x2015c, 0xffffffff, 0x00000f40,
266         0x88c4, 0x001f3ae3, 0x00000082,
267         0x88c4, 0x001f3ae3, 0x00000082,
268         0x8978, 0x3fffffff, 0x04050140,
269         0x8978, 0x3fffffff, 0x04050140,
270         0x88d4, 0x0000001f, 0x00000010,
271         0x88d4, 0x0000001f, 0x00000010,
272         0x8974, 0xffffffff, 0x00000000,
273         0x8974, 0xffffffff, 0x00000000
274 };
275
276 static void ni_init_golden_registers(struct radeon_device *rdev)
277 {
278         switch (rdev->family) {
279         case CHIP_CAYMAN:
280                 radeon_program_register_sequence(rdev,
281                                                  cayman_golden_registers,
282                                                  (const u32)ARRAY_SIZE(cayman_golden_registers));
283                 radeon_program_register_sequence(rdev,
284                                                  cayman_golden_registers2,
285                                                  (const u32)ARRAY_SIZE(cayman_golden_registers2));
286                 break;
287         case CHIP_ARUBA:
288                 if ((rdev->ddev->pci_device == 0x9900) ||
289                     (rdev->ddev->pci_device == 0x9901) ||
290                     (rdev->ddev->pci_device == 0x9903) ||
291                     (rdev->ddev->pci_device == 0x9904) ||
292                     (rdev->ddev->pci_device == 0x9905) ||
293                     (rdev->ddev->pci_device == 0x9906) ||
294                     (rdev->ddev->pci_device == 0x9907) ||
295                     (rdev->ddev->pci_device == 0x9908) ||
296                     (rdev->ddev->pci_device == 0x9909) ||
297                     (rdev->ddev->pci_device == 0x990A) ||
298                     (rdev->ddev->pci_device == 0x990B) ||
299                     (rdev->ddev->pci_device == 0x990C) ||
300                     (rdev->ddev->pci_device == 0x990D) ||
301                     (rdev->ddev->pci_device == 0x990E) ||
302                     (rdev->ddev->pci_device == 0x990F) ||
303                     (rdev->ddev->pci_device == 0x9910) ||
304                     (rdev->ddev->pci_device == 0x9913) ||
305                     (rdev->ddev->pci_device == 0x9917) ||
306                     (rdev->ddev->pci_device == 0x9918)) {
307                         radeon_program_register_sequence(rdev,
308                                                          dvst_golden_registers,
309                                                          (const u32)ARRAY_SIZE(dvst_golden_registers));
310                         radeon_program_register_sequence(rdev,
311                                                          dvst_golden_registers2,
312                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
313                 } else {
314                         radeon_program_register_sequence(rdev,
315                                                          scrapper_golden_registers,
316                                                          (const u32)ARRAY_SIZE(scrapper_golden_registers));
317                         radeon_program_register_sequence(rdev,
318                                                          dvst_golden_registers2,
319                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
320                 }
321                 break;
322         default:
323                 break;
324         }
325 }
326
327 #define BTC_IO_MC_REGS_SIZE 29
328
329 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
330         {0x00000077, 0xff010100},
331         {0x00000078, 0x00000000},
332         {0x00000079, 0x00001434},
333         {0x0000007a, 0xcc08ec08},
334         {0x0000007b, 0x00040000},
335         {0x0000007c, 0x000080c0},
336         {0x0000007d, 0x09000000},
337         {0x0000007e, 0x00210404},
338         {0x00000081, 0x08a8e800},
339         {0x00000082, 0x00030444},
340         {0x00000083, 0x00000000},
341         {0x00000085, 0x00000001},
342         {0x00000086, 0x00000002},
343         {0x00000087, 0x48490000},
344         {0x00000088, 0x20244647},
345         {0x00000089, 0x00000005},
346         {0x0000008b, 0x66030000},
347         {0x0000008c, 0x00006603},
348         {0x0000008d, 0x00000100},
349         {0x0000008f, 0x00001c0a},
350         {0x00000090, 0xff000001},
351         {0x00000094, 0x00101101},
352         {0x00000095, 0x00000fff},
353         {0x00000096, 0x00116fff},
354         {0x00000097, 0x60010000},
355         {0x00000098, 0x10010000},
356         {0x00000099, 0x00006000},
357         {0x0000009a, 0x00001000},
358         {0x0000009f, 0x00946a00}
359 };
360
361 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
362         {0x00000077, 0xff010100},
363         {0x00000078, 0x00000000},
364         {0x00000079, 0x00001434},
365         {0x0000007a, 0xcc08ec08},
366         {0x0000007b, 0x00040000},
367         {0x0000007c, 0x000080c0},
368         {0x0000007d, 0x09000000},
369         {0x0000007e, 0x00210404},
370         {0x00000081, 0x08a8e800},
371         {0x00000082, 0x00030444},
372         {0x00000083, 0x00000000},
373         {0x00000085, 0x00000001},
374         {0x00000086, 0x00000002},
375         {0x00000087, 0x48490000},
376         {0x00000088, 0x20244647},
377         {0x00000089, 0x00000005},
378         {0x0000008b, 0x66030000},
379         {0x0000008c, 0x00006603},
380         {0x0000008d, 0x00000100},
381         {0x0000008f, 0x00001c0a},
382         {0x00000090, 0xff000001},
383         {0x00000094, 0x00101101},
384         {0x00000095, 0x00000fff},
385         {0x00000096, 0x00116fff},
386         {0x00000097, 0x60010000},
387         {0x00000098, 0x10010000},
388         {0x00000099, 0x00006000},
389         {0x0000009a, 0x00001000},
390         {0x0000009f, 0x00936a00}
391 };
392
393 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
394         {0x00000077, 0xff010100},
395         {0x00000078, 0x00000000},
396         {0x00000079, 0x00001434},
397         {0x0000007a, 0xcc08ec08},
398         {0x0000007b, 0x00040000},
399         {0x0000007c, 0x000080c0},
400         {0x0000007d, 0x09000000},
401         {0x0000007e, 0x00210404},
402         {0x00000081, 0x08a8e800},
403         {0x00000082, 0x00030444},
404         {0x00000083, 0x00000000},
405         {0x00000085, 0x00000001},
406         {0x00000086, 0x00000002},
407         {0x00000087, 0x48490000},
408         {0x00000088, 0x20244647},
409         {0x00000089, 0x00000005},
410         {0x0000008b, 0x66030000},
411         {0x0000008c, 0x00006603},
412         {0x0000008d, 0x00000100},
413         {0x0000008f, 0x00001c0a},
414         {0x00000090, 0xff000001},
415         {0x00000094, 0x00101101},
416         {0x00000095, 0x00000fff},
417         {0x00000096, 0x00116fff},
418         {0x00000097, 0x60010000},
419         {0x00000098, 0x10010000},
420         {0x00000099, 0x00006000},
421         {0x0000009a, 0x00001000},
422         {0x0000009f, 0x00916a00}
423 };
424
425 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
426         {0x00000077, 0xff010100},
427         {0x00000078, 0x00000000},
428         {0x00000079, 0x00001434},
429         {0x0000007a, 0xcc08ec08},
430         {0x0000007b, 0x00040000},
431         {0x0000007c, 0x000080c0},
432         {0x0000007d, 0x09000000},
433         {0x0000007e, 0x00210404},
434         {0x00000081, 0x08a8e800},
435         {0x00000082, 0x00030444},
436         {0x00000083, 0x00000000},
437         {0x00000085, 0x00000001},
438         {0x00000086, 0x00000002},
439         {0x00000087, 0x48490000},
440         {0x00000088, 0x20244647},
441         {0x00000089, 0x00000005},
442         {0x0000008b, 0x66030000},
443         {0x0000008c, 0x00006603},
444         {0x0000008d, 0x00000100},
445         {0x0000008f, 0x00001c0a},
446         {0x00000090, 0xff000001},
447         {0x00000094, 0x00101101},
448         {0x00000095, 0x00000fff},
449         {0x00000096, 0x00116fff},
450         {0x00000097, 0x60010000},
451         {0x00000098, 0x10010000},
452         {0x00000099, 0x00006000},
453         {0x0000009a, 0x00001000},
454         {0x0000009f, 0x00976b00}
455 };
456
457 int ni_mc_load_microcode(struct radeon_device *rdev)
458 {
459         const __be32 *fw_data;
460         u32 mem_type, running, blackout = 0;
461         u32 *io_mc_regs;
462         int i, ucode_size, regs_size;
463
464         if (!rdev->mc_fw)
465                 return -EINVAL;
466
467         switch (rdev->family) {
468         case CHIP_BARTS:
469                 io_mc_regs = (u32 *)&barts_io_mc_regs;
470                 ucode_size = BTC_MC_UCODE_SIZE;
471                 regs_size = BTC_IO_MC_REGS_SIZE;
472                 break;
473         case CHIP_TURKS:
474                 io_mc_regs = (u32 *)&turks_io_mc_regs;
475                 ucode_size = BTC_MC_UCODE_SIZE;
476                 regs_size = BTC_IO_MC_REGS_SIZE;
477                 break;
478         case CHIP_CAICOS:
479         default:
480                 io_mc_regs = (u32 *)&caicos_io_mc_regs;
481                 ucode_size = BTC_MC_UCODE_SIZE;
482                 regs_size = BTC_IO_MC_REGS_SIZE;
483                 break;
484         case CHIP_CAYMAN:
485                 io_mc_regs = (u32 *)&cayman_io_mc_regs;
486                 ucode_size = CAYMAN_MC_UCODE_SIZE;
487                 regs_size = BTC_IO_MC_REGS_SIZE;
488                 break;
489         }
490
491         mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
492         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
493
494         if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
495                 if (running) {
496                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
497                         WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
498                 }
499
500                 /* reset the engine and set to writable */
501                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
502                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
503
504                 /* load mc io regs */
505                 for (i = 0; i < regs_size; i++) {
506                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
507                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
508                 }
509                 /* load the MC ucode */
510                 fw_data = (const __be32 *)rdev->mc_fw->data;
511                 for (i = 0; i < ucode_size; i++)
512                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
513
514                 /* put the engine back into the active state */
515                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
516                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
517                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
518
519                 /* wait for training to complete */
520                 for (i = 0; i < rdev->usec_timeout; i++) {
521                         if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
522                                 break;
523                         DRM_UDELAY(1);
524                 }
525
526                 if (running)
527                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
528         }
529
530         return 0;
531 }
532
533 int ni_init_microcode(struct radeon_device *rdev)
534 {
535         const char *chip_name;
536         const char *rlc_chip_name;
537         size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
538         char fw_name[30];
539         int err;
540
541         DRM_DEBUG("\n");
542
543         switch (rdev->family) {
544         case CHIP_BARTS:
545                 chip_name = "BARTS";
546                 rlc_chip_name = "BTC";
547                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
548                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
549                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
550                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
551                 break;
552         case CHIP_TURKS:
553                 chip_name = "TURKS";
554                 rlc_chip_name = "BTC";
555                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
556                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
557                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
558                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
559                 break;
560         case CHIP_CAICOS:
561                 chip_name = "CAICOS";
562                 rlc_chip_name = "BTC";
563                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
564                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
565                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
566                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
567                 break;
568         case CHIP_CAYMAN:
569                 chip_name = "CAYMAN";
570                 rlc_chip_name = "CAYMAN";
571                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
572                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
573                 rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
574                 mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
575                 break;
576         case CHIP_ARUBA:
577                 chip_name = "ARUBA";
578                 rlc_chip_name = "ARUBA";
579                 /* pfp/me same size as CAYMAN */
580                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
581                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
582                 rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
583                 mc_req_size = 0;
584                 break;
585         default: panic("%s: Unsupported family %d", __func__, rdev->family);
586         }
587
588         DRM_INFO("Loading %s Microcode\n", chip_name);
589         err = 0;
590
591         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
592         rdev->pfp_fw = firmware_get(fw_name);
593         if (rdev->pfp_fw == NULL) {
594                 err = -ENOENT;
595                 goto out;
596         }
597         if (rdev->pfp_fw->datasize != pfp_req_size) {
598                 DRM_ERROR(
599                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
600                        rdev->pfp_fw->datasize, fw_name);
601                 err = -EINVAL;
602                 goto out;
603         }
604
605         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
606         rdev->me_fw = firmware_get(fw_name);
607         if (rdev->me_fw == NULL) {
608                 err = -ENOENT;
609                 goto out;
610         }
611         if (rdev->me_fw->datasize != me_req_size) {
612                 DRM_ERROR(
613                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
614                        rdev->me_fw->datasize, fw_name);
615                 err = -EINVAL;
616         }
617
618         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc",
619                   rlc_chip_name);
620         rdev->rlc_fw = firmware_get(fw_name);
621         if (rdev->rlc_fw == NULL) {
622                 err = -ENOENT;
623                 goto out;
624         }
625         if (rdev->rlc_fw->datasize != rlc_req_size) {
626                 DRM_ERROR(
627                        "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
628                        rdev->rlc_fw->datasize, fw_name);
629                 err = -EINVAL;
630         }
631
632         /* no MC ucode on TN */
633         if (!(rdev->flags & RADEON_IS_IGP)) {
634                 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc",
635                           chip_name);
636                 rdev->mc_fw = firmware_get(fw_name);
637                 if (rdev->mc_fw == NULL) {
638                         err = -ENOENT;
639                         goto out;
640                 }
641                 if (rdev->mc_fw->datasize != mc_req_size) {
642                         DRM_ERROR(
643                                "ni_mc: Bogus length %zu in firmware \"%s\"\n",
644                                rdev->mc_fw->datasize, fw_name);
645                         err = -EINVAL;
646                 }
647         }
648 out:
649         if (err) {
650                 if (err != -EINVAL)
651                         DRM_ERROR(
652                                "ni_cp: Failed to load firmware \"%s\"\n",
653                                fw_name);
654                 if (rdev->pfp_fw != NULL) {
655                         firmware_put(rdev->pfp_fw, FIRMWARE_UNLOAD);
656                         rdev->pfp_fw = NULL;
657                 }
658                 if (rdev->me_fw != NULL) {
659                         firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
660                         rdev->me_fw = NULL;
661                 }
662                 if (rdev->rlc_fw != NULL) {
663                         firmware_put(rdev->rlc_fw, FIRMWARE_UNLOAD);
664                         rdev->rlc_fw = NULL;
665                 }
666                 if (rdev->mc_fw != NULL) {
667                         firmware_put(rdev->mc_fw, FIRMWARE_UNLOAD);
668                         rdev->mc_fw = NULL;
669                 }
670         }
671         return err;
672 }
673
674 /**
675  * ni_fini_microcode - drop the firmwares image references
676  *
677  * @rdev: radeon_device pointer
678  *
679  * Drop the pfp, me, mc and rlc firmwares image references.
680  * Called at driver shutdown.
681  */
682 void ni_fini_microcode(struct radeon_device *rdev)
683 {
684
685         if (rdev->pfp_fw != NULL) {
686                 firmware_put(rdev->pfp_fw, FIRMWARE_UNLOAD);
687                 rdev->pfp_fw = NULL;
688         }
689
690         if (rdev->me_fw != NULL) {
691                 firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
692                 rdev->me_fw = NULL;
693         }
694
695         if (rdev->rlc_fw != NULL) {
696                 firmware_put(rdev->rlc_fw, FIRMWARE_UNLOAD);
697                 rdev->rlc_fw = NULL;
698         }
699
700         if (rdev->mc_fw != NULL) {
701                 firmware_put(rdev->mc_fw, FIRMWARE_UNLOAD);
702                 rdev->mc_fw = NULL;
703         }
704 }
705
706
707 /*
708  * Core functions
709  */
710 static void cayman_gpu_init(struct radeon_device *rdev)
711 {
712         u32 gb_addr_config = 0;
713         u32 mc_shared_chmap, mc_arb_ramcfg;
714         u32 cgts_tcc_disable;
715         u32 sx_debug_1;
716         u32 smx_dc_ctl0;
717         u32 cgts_sm_ctrl_reg;
718         u32 hdp_host_path_cntl;
719         u32 tmp;
720         u32 disabled_rb_mask;
721         int i, j;
722
723         switch (rdev->family) {
724         case CHIP_CAYMAN:
725                 rdev->config.cayman.max_shader_engines = 2;
726                 rdev->config.cayman.max_pipes_per_simd = 4;
727                 rdev->config.cayman.max_tile_pipes = 8;
728                 rdev->config.cayman.max_simds_per_se = 12;
729                 rdev->config.cayman.max_backends_per_se = 4;
730                 rdev->config.cayman.max_texture_channel_caches = 8;
731                 rdev->config.cayman.max_gprs = 256;
732                 rdev->config.cayman.max_threads = 256;
733                 rdev->config.cayman.max_gs_threads = 32;
734                 rdev->config.cayman.max_stack_entries = 512;
735                 rdev->config.cayman.sx_num_of_sets = 8;
736                 rdev->config.cayman.sx_max_export_size = 256;
737                 rdev->config.cayman.sx_max_export_pos_size = 64;
738                 rdev->config.cayman.sx_max_export_smx_size = 192;
739                 rdev->config.cayman.max_hw_contexts = 8;
740                 rdev->config.cayman.sq_num_cf_insts = 2;
741
742                 rdev->config.cayman.sc_prim_fifo_size = 0x100;
743                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
744                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
745                 gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
746                 break;
747         case CHIP_ARUBA:
748         default:
749                 rdev->config.cayman.max_shader_engines = 1;
750                 rdev->config.cayman.max_pipes_per_simd = 4;
751                 rdev->config.cayman.max_tile_pipes = 2;
752                 if ((rdev->ddev->pci_device == 0x9900) ||
753                     (rdev->ddev->pci_device == 0x9901) ||
754                     (rdev->ddev->pci_device == 0x9905) ||
755                     (rdev->ddev->pci_device == 0x9906) ||
756                     (rdev->ddev->pci_device == 0x9907) ||
757                     (rdev->ddev->pci_device == 0x9908) ||
758                     (rdev->ddev->pci_device == 0x9909) ||
759                     (rdev->ddev->pci_device == 0x990B) ||
760                     (rdev->ddev->pci_device == 0x990C) ||
761                     (rdev->ddev->pci_device == 0x990F) ||
762                     (rdev->ddev->pci_device == 0x9910) ||
763                     (rdev->ddev->pci_device == 0x9917) ||
764                     (rdev->ddev->pci_device == 0x9999) ||
765                     (rdev->ddev->pci_device == 0x999C)) {
766                         rdev->config.cayman.max_simds_per_se = 6;
767                         rdev->config.cayman.max_backends_per_se = 2;
768                 } else if ((rdev->ddev->pci_device == 0x9903) ||
769                            (rdev->ddev->pci_device == 0x9904) ||
770                            (rdev->ddev->pci_device == 0x990A) ||
771                            (rdev->ddev->pci_device == 0x990D) ||
772                            (rdev->ddev->pci_device == 0x990E) ||
773                            (rdev->ddev->pci_device == 0x9913) ||
774                            (rdev->ddev->pci_device == 0x9918) ||
775                            (rdev->ddev->pci_device == 0x999D)) {
776                         rdev->config.cayman.max_simds_per_se = 4;
777                         rdev->config.cayman.max_backends_per_se = 2;
778                 } else if ((rdev->ddev->pci_device == 0x9919) ||
779                            (rdev->ddev->pci_device == 0x9990) ||
780                            (rdev->ddev->pci_device == 0x9991) ||
781                            (rdev->ddev->pci_device == 0x9994) ||
782                            (rdev->ddev->pci_device == 0x9995) ||
783                            (rdev->ddev->pci_device == 0x9996) ||
784                            (rdev->ddev->pci_device == 0x999A) ||
785                            (rdev->ddev->pci_device == 0x99A0)) {
786                         rdev->config.cayman.max_simds_per_se = 3;
787                         rdev->config.cayman.max_backends_per_se = 1;
788                 } else {
789                         rdev->config.cayman.max_simds_per_se = 2;
790                         rdev->config.cayman.max_backends_per_se = 1;
791                 }
792                 rdev->config.cayman.max_texture_channel_caches = 2;
793                 rdev->config.cayman.max_gprs = 256;
794                 rdev->config.cayman.max_threads = 256;
795                 rdev->config.cayman.max_gs_threads = 32;
796                 rdev->config.cayman.max_stack_entries = 512;
797                 rdev->config.cayman.sx_num_of_sets = 8;
798                 rdev->config.cayman.sx_max_export_size = 256;
799                 rdev->config.cayman.sx_max_export_pos_size = 64;
800                 rdev->config.cayman.sx_max_export_smx_size = 192;
801                 rdev->config.cayman.max_hw_contexts = 8;
802                 rdev->config.cayman.sq_num_cf_insts = 2;
803
804                 rdev->config.cayman.sc_prim_fifo_size = 0x40;
805                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
806                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
807                 gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
808                 break;
809         }
810
811         /* Initialize HDP */
812         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
813                 WREG32((0x2c14 + j), 0x00000000);
814                 WREG32((0x2c18 + j), 0x00000000);
815                 WREG32((0x2c1c + j), 0x00000000);
816                 WREG32((0x2c20 + j), 0x00000000);
817                 WREG32((0x2c24 + j), 0x00000000);
818         }
819
820         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
821
822         evergreen_fix_pci_max_read_req_size(rdev);
823
824         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
825         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
826
827         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
828         rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
829         if (rdev->config.cayman.mem_row_size_in_kb > 4)
830                 rdev->config.cayman.mem_row_size_in_kb = 4;
831         /* XXX use MC settings? */
832         rdev->config.cayman.shader_engine_tile_size = 32;
833         rdev->config.cayman.num_gpus = 1;
834         rdev->config.cayman.multi_gpu_tile_size = 64;
835
836         tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
837         rdev->config.cayman.num_tile_pipes = (1 << tmp);
838         tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
839         rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
840         tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
841         rdev->config.cayman.num_shader_engines = tmp + 1;
842         tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
843         rdev->config.cayman.num_gpus = tmp + 1;
844         tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
845         rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
846         tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
847         rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
848
849
850         /* setup tiling info dword.  gb_addr_config is not adequate since it does
851          * not have bank info, so create a custom tiling dword.
852          * bits 3:0   num_pipes
853          * bits 7:4   num_banks
854          * bits 11:8  group_size
855          * bits 15:12 row_size
856          */
857         rdev->config.cayman.tile_config = 0;
858         switch (rdev->config.cayman.num_tile_pipes) {
859         case 1:
860         default:
861                 rdev->config.cayman.tile_config |= (0 << 0);
862                 break;
863         case 2:
864                 rdev->config.cayman.tile_config |= (1 << 0);
865                 break;
866         case 4:
867                 rdev->config.cayman.tile_config |= (2 << 0);
868                 break;
869         case 8:
870                 rdev->config.cayman.tile_config |= (3 << 0);
871                 break;
872         }
873
874         /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
875         if (rdev->flags & RADEON_IS_IGP)
876                 rdev->config.cayman.tile_config |= 1 << 4;
877         else {
878                 switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
879                 case 0: /* four banks */
880                         rdev->config.cayman.tile_config |= 0 << 4;
881                         break;
882                 case 1: /* eight banks */
883                         rdev->config.cayman.tile_config |= 1 << 4;
884                         break;
885                 case 2: /* sixteen banks */
886                 default:
887                         rdev->config.cayman.tile_config |= 2 << 4;
888                         break;
889                 }
890         }
891         rdev->config.cayman.tile_config |=
892                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
893         rdev->config.cayman.tile_config |=
894                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
895
896         tmp = 0;
897         for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
898                 u32 rb_disable_bitmap;
899
900                 WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
901                 WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
902                 rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
903                 tmp <<= 4;
904                 tmp |= rb_disable_bitmap;
905         }
906         /* enabled rb are just the one not disabled :) */
907         disabled_rb_mask = tmp;
908         tmp = 0;
909         for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
910                 tmp |= (1 << i);
911         /* if all the backends are disabled, fix it up here */
912         if ((disabled_rb_mask & tmp) == tmp) {
913                 for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
914                         disabled_rb_mask &= ~(1 << i);
915         }
916
917         WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
918         WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
919
920         WREG32(GB_ADDR_CONFIG, gb_addr_config);
921         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
922         if (ASIC_IS_DCE6(rdev))
923                 WREG32(DMIF_ADDR_CALC, gb_addr_config);
924         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
925         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
926         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
927         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
928         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
929         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
930
931         if ((rdev->config.cayman.max_backends_per_se == 1) &&
932             (rdev->flags & RADEON_IS_IGP)) {
933                 if ((disabled_rb_mask & 3) == 1) {
934                         /* RB0 disabled, RB1 enabled */
935                         tmp = 0x11111111;
936                 } else {
937                         /* RB1 disabled, RB0 enabled */
938                         tmp = 0x00000000;
939                 }
940         } else {
941                 tmp = gb_addr_config & NUM_PIPES_MASK;
942                 tmp = r6xx_remap_render_backend(rdev, tmp,
943                                                 rdev->config.cayman.max_backends_per_se *
944                                                 rdev->config.cayman.max_shader_engines,
945                                                 CAYMAN_MAX_BACKENDS, disabled_rb_mask);
946         }
947         WREG32(GB_BACKEND_MAP, tmp);
948
949         cgts_tcc_disable = 0xffff0000;
950         for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
951                 cgts_tcc_disable &= ~(1 << (16 + i));
952         WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
953         WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
954         WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
955         WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
956
957         /* reprogram the shader complex */
958         cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
959         for (i = 0; i < 16; i++)
960                 WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
961         WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
962
963         /* set HW defaults for 3D engine */
964         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
965
966         sx_debug_1 = RREG32(SX_DEBUG_1);
967         sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
968         WREG32(SX_DEBUG_1, sx_debug_1);
969
970         smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
971         smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
972         smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
973         WREG32(SMX_DC_CTL0, smx_dc_ctl0);
974
975         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
976
977         /* need to be explicitly zero-ed */
978         WREG32(VGT_OFFCHIP_LDS_BASE, 0);
979         WREG32(SQ_LSTMP_RING_BASE, 0);
980         WREG32(SQ_HSTMP_RING_BASE, 0);
981         WREG32(SQ_ESTMP_RING_BASE, 0);
982         WREG32(SQ_GSTMP_RING_BASE, 0);
983         WREG32(SQ_VSTMP_RING_BASE, 0);
984         WREG32(SQ_PSTMP_RING_BASE, 0);
985
986         WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
987
988         WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
989                                         POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
990                                         SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
991
992         WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
993                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
994                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
995
996
997         WREG32(VGT_NUM_INSTANCES, 1);
998
999         WREG32(CP_PERFMON_CNTL, 0);
1000
1001         WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
1002                                   FETCH_FIFO_HIWATER(0x4) |
1003                                   DONE_FIFO_HIWATER(0xe0) |
1004                                   ALU_UPDATE_FIFO_HIWATER(0x8)));
1005
1006         WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
1007         WREG32(SQ_CONFIG, (VC_ENABLE |
1008                            EXPORT_SRC_C |
1009                            GFX_PRIO(0) |
1010                            CS1_PRIO(0) |
1011                            CS2_PRIO(1)));
1012         WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1013
1014         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1015                                           FORCE_EOV_MAX_REZ_CNT(255)));
1016
1017         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1018                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1019
1020         WREG32(VGT_GS_VERTEX_REUSE, 16);
1021         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1022
1023         WREG32(CB_PERF_CTR0_SEL_0, 0);
1024         WREG32(CB_PERF_CTR0_SEL_1, 0);
1025         WREG32(CB_PERF_CTR1_SEL_0, 0);
1026         WREG32(CB_PERF_CTR1_SEL_1, 0);
1027         WREG32(CB_PERF_CTR2_SEL_0, 0);
1028         WREG32(CB_PERF_CTR2_SEL_1, 0);
1029         WREG32(CB_PERF_CTR3_SEL_0, 0);
1030         WREG32(CB_PERF_CTR3_SEL_1, 0);
1031
1032         tmp = RREG32(HDP_MISC_CNTL);
1033         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1034         WREG32(HDP_MISC_CNTL, tmp);
1035
1036         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1037         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1038
1039         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1040
1041         DRM_UDELAY(50);
1042 }
1043
1044 /*
1045  * GART
1046  */
1047 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1048 {
1049         /* flush hdp cache */
1050         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1051
1052         /* bits 0-7 are the VM contexts0-7 */
1053         WREG32(VM_INVALIDATE_REQUEST, 1);
1054 }
1055
1056 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1057 {
1058         int i, r;
1059
1060         if (rdev->gart.robj == NULL) {
1061                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1062                 return -EINVAL;
1063         }
1064         r = radeon_gart_table_vram_pin(rdev);
1065         if (r)
1066                 return r;
1067         radeon_gart_restore(rdev);
1068         /* Setup TLB control */
1069         WREG32(MC_VM_MX_L1_TLB_CNTL,
1070                (0xA << 7) |
1071                ENABLE_L1_TLB |
1072                ENABLE_L1_FRAGMENT_PROCESSING |
1073                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1074                ENABLE_ADVANCED_DRIVER_MODEL |
1075                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1076         /* Setup L2 cache */
1077         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1078                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1079                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1080                EFFECTIVE_L2_QUEUE_SIZE(7) |
1081                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1082         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1083         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1084                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1085         /* setup context0 */
1086         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1087         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1088         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1089         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1090                         (u32)(rdev->dummy_page.addr >> 12));
1091         WREG32(VM_CONTEXT0_CNTL2, 0);
1092         WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1093                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1094
1095         WREG32(0x15D4, 0);
1096         WREG32(0x15D8, 0);
1097         WREG32(0x15DC, 0);
1098
1099         /* empty context1-7 */
1100         /* Assign the pt base to something valid for now; the pts used for
1101          * the VMs are determined by the application and setup and assigned
1102          * on the fly in the vm part of radeon_gart.c
1103          */
1104         for (i = 1; i < 8; i++) {
1105                 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1106                 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1107                 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1108                         rdev->gart.table_addr >> 12);
1109         }
1110
1111         /* enable context1-7 */
1112         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1113                (u32)(rdev->dummy_page.addr >> 12));
1114         WREG32(VM_CONTEXT1_CNTL2, 4);
1115         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1116                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1117                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1118                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1119                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1120                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1121                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1122                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1123                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1124                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1125                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1126                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1127                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1128
1129         cayman_pcie_gart_tlb_flush(rdev);
1130         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1131                  (unsigned)(rdev->mc.gtt_size >> 20),
1132                  (unsigned long long)rdev->gart.table_addr);
1133         rdev->gart.ready = true;
1134         return 0;
1135 }
1136
1137 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1138 {
1139         /* Disable all tables */
1140         WREG32(VM_CONTEXT0_CNTL, 0);
1141         WREG32(VM_CONTEXT1_CNTL, 0);
1142         /* Setup TLB control */
1143         WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1144                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1145                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1146         /* Setup L2 cache */
1147         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1148                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1149                EFFECTIVE_L2_QUEUE_SIZE(7) |
1150                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1151         WREG32(VM_L2_CNTL2, 0);
1152         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1153                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1154         radeon_gart_table_vram_unpin(rdev);
1155 }
1156
1157 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1158 {
1159         cayman_pcie_gart_disable(rdev);
1160         radeon_gart_table_vram_free(rdev);
1161         radeon_gart_fini(rdev);
1162 }
1163
1164 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1165                               int ring, u32 cp_int_cntl)
1166 {
1167         u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1168
1169         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1170         WREG32(CP_INT_CNTL, cp_int_cntl);
1171 }
1172
1173 /*
1174  * CP.
1175  */
1176 void cayman_fence_ring_emit(struct radeon_device *rdev,
1177                             struct radeon_fence *fence)
1178 {
1179         struct radeon_ring *ring = &rdev->ring[fence->ring];
1180         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1181
1182         /* flush read cache over gart for this vmid */
1183         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1184         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1185         radeon_ring_write(ring, 0);
1186         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1187         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1188         radeon_ring_write(ring, 0xFFFFFFFF);
1189         radeon_ring_write(ring, 0);
1190         radeon_ring_write(ring, 10); /* poll interval */
1191         /* EVENT_WRITE_EOP - flush caches, send int */
1192         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1193         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1194         radeon_ring_write(ring, addr & 0xffffffff);
1195         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1196         radeon_ring_write(ring, fence->seq);
1197         radeon_ring_write(ring, 0);
1198 }
1199
1200 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1201 {
1202         struct radeon_ring *ring = &rdev->ring[ib->ring];
1203
1204         /* set to DX10/11 mode */
1205         radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1206         radeon_ring_write(ring, 1);
1207
1208         if (ring->rptr_save_reg) {
1209                 uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1210                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1211                 radeon_ring_write(ring, ((ring->rptr_save_reg - 
1212                                           PACKET3_SET_CONFIG_REG_START) >> 2));
1213                 radeon_ring_write(ring, next_rptr);
1214         }
1215
1216         radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1217         radeon_ring_write(ring,
1218 #ifdef __BIG_ENDIAN
1219                           (2 << 0) |
1220 #endif
1221                           (ib->gpu_addr & 0xFFFFFFFC));
1222         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1223         radeon_ring_write(ring, ib->length_dw | 
1224                           (ib->vm ? (ib->vm->id << 24) : 0));
1225
1226         /* flush read cache over gart for this vmid */
1227         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1228         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1229         radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1230         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1231         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1232         radeon_ring_write(ring, 0xFFFFFFFF);
1233         radeon_ring_write(ring, 0);
1234         radeon_ring_write(ring, 10); /* poll interval */
1235 }
1236
1237 void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
1238                                struct radeon_ring *ring,
1239                                struct radeon_semaphore *semaphore,
1240                                bool emit_wait)
1241 {
1242         uint64_t addr = semaphore->gpu_addr;
1243
1244         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
1245         radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
1246
1247         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
1248         radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
1249
1250         radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
1251         radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
1252 }
1253
1254 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1255 {
1256         if (enable)
1257                 WREG32(CP_ME_CNTL, 0);
1258         else {
1259                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1260                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1261                 WREG32(SCRATCH_UMSK, 0);
1262                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1263         }
1264 }
1265
1266 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1267 {
1268         const __be32 *fw_data;
1269         int i;
1270
1271         if (!rdev->me_fw || !rdev->pfp_fw)
1272                 return -EINVAL;
1273
1274         cayman_cp_enable(rdev, false);
1275
1276         fw_data = (const __be32 *)rdev->pfp_fw->data;
1277         WREG32(CP_PFP_UCODE_ADDR, 0);
1278         for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1279                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1280         WREG32(CP_PFP_UCODE_ADDR, 0);
1281
1282         fw_data = (const __be32 *)rdev->me_fw->data;
1283         WREG32(CP_ME_RAM_WADDR, 0);
1284         for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1285                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1286
1287         WREG32(CP_PFP_UCODE_ADDR, 0);
1288         WREG32(CP_ME_RAM_WADDR, 0);
1289         WREG32(CP_ME_RAM_RADDR, 0);
1290         return 0;
1291 }
1292
1293 static int cayman_cp_start(struct radeon_device *rdev)
1294 {
1295         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1296         int r, i;
1297
1298         r = radeon_ring_lock(rdev, ring, 7);
1299         if (r) {
1300                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1301                 return r;
1302         }
1303         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1304         radeon_ring_write(ring, 0x1);
1305         radeon_ring_write(ring, 0x0);
1306         radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1307         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1308         radeon_ring_write(ring, 0);
1309         radeon_ring_write(ring, 0);
1310         radeon_ring_unlock_commit(rdev, ring);
1311
1312         cayman_cp_enable(rdev, true);
1313
1314         r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1315         if (r) {
1316                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1317                 return r;
1318         }
1319
1320         /* setup clear context state */
1321         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1322         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1323
1324         for (i = 0; i < cayman_default_size; i++)
1325                 radeon_ring_write(ring, cayman_default_state[i]);
1326
1327         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1328         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1329
1330         /* set clear context state */
1331         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1332         radeon_ring_write(ring, 0);
1333
1334         /* SQ_VTX_BASE_VTX_LOC */
1335         radeon_ring_write(ring, 0xc0026f00);
1336         radeon_ring_write(ring, 0x00000000);
1337         radeon_ring_write(ring, 0x00000000);
1338         radeon_ring_write(ring, 0x00000000);
1339
1340         /* Clear consts */
1341         radeon_ring_write(ring, 0xc0036f00);
1342         radeon_ring_write(ring, 0x00000bc4);
1343         radeon_ring_write(ring, 0xffffffff);
1344         radeon_ring_write(ring, 0xffffffff);
1345         radeon_ring_write(ring, 0xffffffff);
1346
1347         radeon_ring_write(ring, 0xc0026900);
1348         radeon_ring_write(ring, 0x00000316);
1349         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1350         radeon_ring_write(ring, 0x00000010); /*  */
1351
1352         radeon_ring_unlock_commit(rdev, ring);
1353
1354         /* XXX init other rings */
1355
1356         return 0;
1357 }
1358
1359 static void cayman_cp_fini(struct radeon_device *rdev)
1360 {
1361         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1362         cayman_cp_enable(rdev, false);
1363         radeon_ring_fini(rdev, ring);
1364         radeon_scratch_free(rdev, ring->rptr_save_reg);
1365 }
1366
1367 static int cayman_cp_resume(struct radeon_device *rdev)
1368 {
1369         static const int ridx[] = {
1370                 RADEON_RING_TYPE_GFX_INDEX,
1371                 CAYMAN_RING_TYPE_CP1_INDEX,
1372                 CAYMAN_RING_TYPE_CP2_INDEX
1373         };
1374         static const unsigned cp_rb_cntl[] = {
1375                 CP_RB0_CNTL,
1376                 CP_RB1_CNTL,
1377                 CP_RB2_CNTL,
1378         };
1379         static const unsigned cp_rb_rptr_addr[] = {
1380                 CP_RB0_RPTR_ADDR,
1381                 CP_RB1_RPTR_ADDR,
1382                 CP_RB2_RPTR_ADDR
1383         };
1384         static const unsigned cp_rb_rptr_addr_hi[] = {
1385                 CP_RB0_RPTR_ADDR_HI,
1386                 CP_RB1_RPTR_ADDR_HI,
1387                 CP_RB2_RPTR_ADDR_HI
1388         };
1389         static const unsigned cp_rb_base[] = {
1390                 CP_RB0_BASE,
1391                 CP_RB1_BASE,
1392                 CP_RB2_BASE
1393         };
1394         struct radeon_ring *ring;
1395         int i, r;
1396
1397         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1398         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1399                                  SOFT_RESET_PA |
1400                                  SOFT_RESET_SH |
1401                                  SOFT_RESET_VGT |
1402                                  SOFT_RESET_SPI |
1403                                  SOFT_RESET_SX));
1404         RREG32(GRBM_SOFT_RESET);
1405         DRM_MDELAY(15);
1406         WREG32(GRBM_SOFT_RESET, 0);
1407         RREG32(GRBM_SOFT_RESET);
1408
1409         WREG32(CP_SEM_WAIT_TIMER, 0x0);
1410         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1411
1412         /* Set the write pointer delay */
1413         WREG32(CP_RB_WPTR_DELAY, 0);
1414
1415         WREG32(CP_DEBUG, (1 << 27));
1416
1417         /* set the wb address whether it's enabled or not */
1418         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1419         WREG32(SCRATCH_UMSK, 0xff);
1420
1421         for (i = 0; i < 3; ++i) {
1422                 uint32_t rb_cntl;
1423                 uint64_t addr;
1424
1425                 /* Set ring buffer size */
1426                 ring = &rdev->ring[ridx[i]];
1427                 rb_cntl = drm_order(ring->ring_size / 8);
1428                 rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
1429 #ifdef __BIG_ENDIAN
1430                 rb_cntl |= BUF_SWAP_32BIT;
1431 #endif
1432                 WREG32(cp_rb_cntl[i], rb_cntl);
1433
1434                 /* set the wb address whether it's enabled or not */
1435                 addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1436                 WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1437                 WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1438         }
1439
1440         /* set the rb base addr, this causes an internal reset of ALL rings */
1441         for (i = 0; i < 3; ++i) {
1442                 ring = &rdev->ring[ridx[i]];
1443                 WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1444         }
1445
1446         for (i = 0; i < 3; ++i) {
1447                 /* Initialize the ring buffer's read and write pointers */
1448                 ring = &rdev->ring[ridx[i]];
1449                 WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1450
1451                 ring->rptr = ring->wptr = 0;
1452                 WREG32(ring->rptr_reg, ring->rptr);
1453                 WREG32(ring->wptr_reg, ring->wptr);
1454
1455                 DRM_MDELAY(1);
1456                 WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1457         }
1458
1459         /* start the rings */
1460         cayman_cp_start(rdev);
1461         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1462         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1463         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1464         /* this only test cp0 */
1465         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1466         if (r) {
1467                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1468                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1469                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1470                 return r;
1471         }
1472
1473         return 0;
1474 }
1475
1476 /*
1477  * DMA
1478  * Starting with R600, the GPU has an asynchronous
1479  * DMA engine.  The programming model is very similar
1480  * to the 3D engine (ring buffer, IBs, etc.), but the
1481  * DMA controller has it's own packet format that is
1482  * different form the PM4 format used by the 3D engine.
1483  * It supports copying data, writing embedded data,
1484  * solid fills, and a number of other things.  It also
1485  * has support for tiling/detiling of buffers.
1486  * Cayman and newer support two asynchronous DMA engines.
1487  */
1488 /**
1489  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1490  *
1491  * @rdev: radeon_device pointer
1492  * @ib: IB object to schedule
1493  *
1494  * Schedule an IB in the DMA ring (cayman-SI).
1495  */
1496 void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1497                                 struct radeon_ib *ib)
1498 {
1499         struct radeon_ring *ring = &rdev->ring[ib->ring];
1500
1501         if (rdev->wb.enabled) {
1502                 u32 next_rptr = ring->wptr + 4;
1503                 while ((next_rptr & 7) != 5)
1504                         next_rptr++;
1505                 next_rptr += 3;
1506                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1507                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1508                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1509                 radeon_ring_write(ring, next_rptr);
1510         }
1511
1512         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1513          * Pad as necessary with NOPs.
1514          */
1515         while ((ring->wptr & 7) != 5)
1516                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1517         radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1518         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1519         radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1520
1521 }
1522
1523 /**
1524  * cayman_dma_stop - stop the async dma engines
1525  *
1526  * @rdev: radeon_device pointer
1527  *
1528  * Stop the async dma engines (cayman-SI).
1529  */
1530 void cayman_dma_stop(struct radeon_device *rdev)
1531 {
1532         u32 rb_cntl;
1533
1534         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1535
1536         /* dma0 */
1537         rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1538         rb_cntl &= ~DMA_RB_ENABLE;
1539         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1540
1541         /* dma1 */
1542         rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1543         rb_cntl &= ~DMA_RB_ENABLE;
1544         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1545
1546         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1547         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1548 }
1549
1550 /**
1551  * cayman_dma_resume - setup and start the async dma engines
1552  *
1553  * @rdev: radeon_device pointer
1554  *
1555  * Set up the DMA ring buffers and enable them. (cayman-SI).
1556  * Returns 0 for success, error for failure.
1557  */
1558 int cayman_dma_resume(struct radeon_device *rdev)
1559 {
1560         struct radeon_ring *ring;
1561         u32 rb_cntl, dma_cntl, ib_cntl;
1562         u32 rb_bufsz;
1563         u32 reg_offset, wb_offset;
1564         int i, r;
1565
1566         /* Reset dma */
1567         WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1568         RREG32(SRBM_SOFT_RESET);
1569         DRM_UDELAY(50);
1570         WREG32(SRBM_SOFT_RESET, 0);
1571
1572         for (i = 0; i < 2; i++) {
1573                 if (i == 0) {
1574                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1575                         reg_offset = DMA0_REGISTER_OFFSET;
1576                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
1577                 } else {
1578                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1579                         reg_offset = DMA1_REGISTER_OFFSET;
1580                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1581                 }
1582
1583                 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1584                 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1585
1586                 /* Set ring buffer size in dwords */
1587                 rb_bufsz = drm_order(ring->ring_size / 4);
1588                 rb_cntl = rb_bufsz << 1;
1589 #ifdef __BIG_ENDIAN
1590                 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1591 #endif
1592                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1593
1594                 /* Initialize the ring buffer's read and write pointers */
1595                 WREG32(DMA_RB_RPTR + reg_offset, 0);
1596                 WREG32(DMA_RB_WPTR + reg_offset, 0);
1597
1598                 /* set the wb address whether it's enabled or not */
1599                 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1600                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1601                 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1602                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1603
1604                 if (rdev->wb.enabled)
1605                         rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1606
1607                 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1608
1609                 /* enable DMA IBs */
1610                 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1611 #ifdef __BIG_ENDIAN
1612                 ib_cntl |= DMA_IB_SWAP_ENABLE;
1613 #endif
1614                 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
1615
1616                 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1617                 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1618                 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1619
1620                 ring->wptr = 0;
1621                 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1622
1623                 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1624
1625                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1626
1627                 ring->ready = true;
1628
1629                 r = radeon_ring_test(rdev, ring->idx, ring);
1630                 if (r) {
1631                         ring->ready = false;
1632                         return r;
1633                 }
1634         }
1635
1636         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1637
1638         return 0;
1639 }
1640
1641 /**
1642  * cayman_dma_fini - tear down the async dma engines
1643  *
1644  * @rdev: radeon_device pointer
1645  *
1646  * Stop the async dma engines and free the rings (cayman-SI).
1647  */
1648 void cayman_dma_fini(struct radeon_device *rdev)
1649 {
1650         cayman_dma_stop(rdev);
1651         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1652         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1653 }
1654
1655 static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1656 {
1657         u32 reset_mask = 0;
1658         u32 tmp;
1659
1660         /* GRBM_STATUS */
1661         tmp = RREG32(GRBM_STATUS);
1662         if (tmp & (PA_BUSY | SC_BUSY |
1663                    SH_BUSY | SX_BUSY |
1664                    TA_BUSY | VGT_BUSY |
1665                    DB_BUSY | CB_BUSY |
1666                    GDS_BUSY | SPI_BUSY |
1667                    IA_BUSY | IA_BUSY_NO_DMA))
1668                 reset_mask |= RADEON_RESET_GFX;
1669
1670         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1671                    CP_BUSY | CP_COHERENCY_BUSY))
1672                 reset_mask |= RADEON_RESET_CP;
1673
1674         if (tmp & GRBM_EE_BUSY)
1675                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1676
1677         /* DMA_STATUS_REG 0 */
1678         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1679         if (!(tmp & DMA_IDLE))
1680                 reset_mask |= RADEON_RESET_DMA;
1681
1682         /* DMA_STATUS_REG 1 */
1683         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1684         if (!(tmp & DMA_IDLE))
1685                 reset_mask |= RADEON_RESET_DMA1;
1686
1687         /* SRBM_STATUS2 */
1688         tmp = RREG32(SRBM_STATUS2);
1689         if (tmp & DMA_BUSY)
1690                 reset_mask |= RADEON_RESET_DMA;
1691
1692         if (tmp & DMA1_BUSY)
1693                 reset_mask |= RADEON_RESET_DMA1;
1694
1695         /* SRBM_STATUS */
1696         tmp = RREG32(SRBM_STATUS);
1697         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1698                 reset_mask |= RADEON_RESET_RLC;
1699
1700         if (tmp & IH_BUSY)
1701                 reset_mask |= RADEON_RESET_IH;
1702
1703         if (tmp & SEM_BUSY)
1704                 reset_mask |= RADEON_RESET_SEM;
1705
1706         if (tmp & GRBM_RQ_PENDING)
1707                 reset_mask |= RADEON_RESET_GRBM;
1708
1709         if (tmp & VMC_BUSY)
1710                 reset_mask |= RADEON_RESET_VMC;
1711
1712         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1713                    MCC_BUSY | MCD_BUSY))
1714                 reset_mask |= RADEON_RESET_MC;
1715
1716         if (evergreen_is_display_hung(rdev))
1717                 reset_mask |= RADEON_RESET_DISPLAY;
1718
1719         /* VM_L2_STATUS */
1720         tmp = RREG32(VM_L2_STATUS);
1721         if (tmp & L2_BUSY)
1722                 reset_mask |= RADEON_RESET_VMC;
1723
1724         /* Skip MC reset as it's mostly likely not hung, just busy */
1725         if (reset_mask & RADEON_RESET_MC) {
1726                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1727                 reset_mask &= ~RADEON_RESET_MC;
1728         }
1729
1730         return reset_mask;
1731 }
1732
1733 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1734 {
1735         struct evergreen_mc_save save;
1736         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1737         u32 tmp;
1738
1739         if (reset_mask == 0)
1740                 return;
1741
1742         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1743
1744         evergreen_print_gpu_status_regs(rdev);
1745         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1746                  RREG32(0x14F8));
1747         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1748                  RREG32(0x14D8));
1749         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1750                  RREG32(0x14FC));
1751         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1752                  RREG32(0x14DC));
1753
1754         /* Disable CP parsing/prefetching */
1755         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1756
1757         if (reset_mask & RADEON_RESET_DMA) {
1758                 /* dma0 */
1759                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1760                 tmp &= ~DMA_RB_ENABLE;
1761                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1762         }
1763
1764         if (reset_mask & RADEON_RESET_DMA1) {
1765                 /* dma1 */
1766                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1767                 tmp &= ~DMA_RB_ENABLE;
1768                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1769         }
1770
1771         DRM_UDELAY(50);
1772
1773         evergreen_mc_stop(rdev, &save);
1774         if (evergreen_mc_wait_for_idle(rdev)) {
1775                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1776         }
1777
1778         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1779                 grbm_soft_reset = SOFT_RESET_CB |
1780                         SOFT_RESET_DB |
1781                         SOFT_RESET_GDS |
1782                         SOFT_RESET_PA |
1783                         SOFT_RESET_SC |
1784                         SOFT_RESET_SPI |
1785                         SOFT_RESET_SH |
1786                         SOFT_RESET_SX |
1787                         SOFT_RESET_TC |
1788                         SOFT_RESET_TA |
1789                         SOFT_RESET_VGT |
1790                         SOFT_RESET_IA;
1791         }
1792
1793         if (reset_mask & RADEON_RESET_CP) {
1794                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1795
1796                 srbm_soft_reset |= SOFT_RESET_GRBM;
1797         }
1798
1799         if (reset_mask & RADEON_RESET_DMA)
1800                 srbm_soft_reset |= SOFT_RESET_DMA;
1801
1802         if (reset_mask & RADEON_RESET_DMA1)
1803                 srbm_soft_reset |= SOFT_RESET_DMA1;
1804
1805         if (reset_mask & RADEON_RESET_DISPLAY)
1806                 srbm_soft_reset |= SOFT_RESET_DC;
1807
1808         if (reset_mask & RADEON_RESET_RLC)
1809                 srbm_soft_reset |= SOFT_RESET_RLC;
1810
1811         if (reset_mask & RADEON_RESET_SEM)
1812                 srbm_soft_reset |= SOFT_RESET_SEM;
1813
1814         if (reset_mask & RADEON_RESET_IH)
1815                 srbm_soft_reset |= SOFT_RESET_IH;
1816
1817         if (reset_mask & RADEON_RESET_GRBM)
1818                 srbm_soft_reset |= SOFT_RESET_GRBM;
1819
1820         if (reset_mask & RADEON_RESET_VMC)
1821                 srbm_soft_reset |= SOFT_RESET_VMC;
1822
1823         if (!(rdev->flags & RADEON_IS_IGP)) {
1824                 if (reset_mask & RADEON_RESET_MC)
1825                         srbm_soft_reset |= SOFT_RESET_MC;
1826         }
1827
1828         if (grbm_soft_reset) {
1829                 tmp = RREG32(GRBM_SOFT_RESET);
1830                 tmp |= grbm_soft_reset;
1831                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1832                 WREG32(GRBM_SOFT_RESET, tmp);
1833                 tmp = RREG32(GRBM_SOFT_RESET);
1834
1835                 DRM_UDELAY(50);
1836
1837                 tmp &= ~grbm_soft_reset;
1838                 WREG32(GRBM_SOFT_RESET, tmp);
1839                 tmp = RREG32(GRBM_SOFT_RESET);
1840         }
1841
1842         if (srbm_soft_reset) {
1843                 tmp = RREG32(SRBM_SOFT_RESET);
1844                 tmp |= srbm_soft_reset;
1845                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1846                 WREG32(SRBM_SOFT_RESET, tmp);
1847                 tmp = RREG32(SRBM_SOFT_RESET);
1848
1849                 DRM_UDELAY(50);
1850
1851                 tmp &= ~srbm_soft_reset;
1852                 WREG32(SRBM_SOFT_RESET, tmp);
1853                 tmp = RREG32(SRBM_SOFT_RESET);
1854         }
1855
1856         /* Wait a little for things to settle down */
1857         DRM_UDELAY(50);
1858
1859         evergreen_mc_resume(rdev, &save);
1860         DRM_UDELAY(50);
1861
1862         evergreen_print_gpu_status_regs(rdev);
1863 }
1864
1865 int cayman_asic_reset(struct radeon_device *rdev)
1866 {
1867         u32 reset_mask;
1868
1869         reset_mask = cayman_gpu_check_soft_reset(rdev);
1870
1871         if (reset_mask)
1872                 r600_set_bios_scratch_engine_hung(rdev, true);
1873
1874         cayman_gpu_soft_reset(rdev, reset_mask);
1875
1876         reset_mask = cayman_gpu_check_soft_reset(rdev);
1877
1878         if (!reset_mask)
1879                 r600_set_bios_scratch_engine_hung(rdev, false);
1880
1881         return 0;
1882 }
1883
1884 /**
1885  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
1886  *
1887  * @rdev: radeon_device pointer
1888  * @ring: radeon_ring structure holding ring information
1889  *
1890  * Check if the GFX engine is locked up.
1891  * Returns true if the engine appears to be locked up, false if not.
1892  */
1893 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1894 {
1895         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1896
1897         if (!(reset_mask & (RADEON_RESET_GFX |
1898                             RADEON_RESET_COMPUTE |
1899                             RADEON_RESET_CP))) {
1900                 radeon_ring_lockup_update(ring);
1901                 return false;
1902         }
1903         /* force CP activities */
1904         radeon_ring_force_activity(rdev, ring);
1905         return radeon_ring_test_lockup(rdev, ring);
1906 }
1907
1908 /**
1909  * cayman_dma_is_lockup - Check if the DMA engine is locked up
1910  *
1911  * @rdev: radeon_device pointer
1912  * @ring: radeon_ring structure holding ring information
1913  *
1914  * Check if the async DMA engine is locked up.
1915  * Returns true if the engine appears to be locked up, false if not.
1916  */
1917 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1918 {
1919         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1920         u32 mask;
1921
1922         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1923                 mask = RADEON_RESET_DMA;
1924         else
1925                 mask = RADEON_RESET_DMA1;
1926
1927         if (!(reset_mask & mask)) {
1928                 radeon_ring_lockup_update(ring);
1929                 return false;
1930         }
1931         /* force ring activities */
1932         radeon_ring_force_activity(rdev, ring);
1933         return radeon_ring_test_lockup(rdev, ring);
1934 }
1935
1936 static int cayman_startup(struct radeon_device *rdev)
1937 {
1938         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1939         int r;
1940
1941         /* enable pcie gen2 link */
1942         evergreen_pcie_gen2_enable(rdev);
1943
1944         if (rdev->flags & RADEON_IS_IGP) {
1945                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
1946                         r = ni_init_microcode(rdev);
1947                         if (r) {
1948                                 DRM_ERROR("Failed to load firmware!\n");
1949                                 return r;
1950                         }
1951                 }
1952         } else {
1953                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
1954                         r = ni_init_microcode(rdev);
1955                         if (r) {
1956                                 DRM_ERROR("Failed to load firmware!\n");
1957                                 return r;
1958                         }
1959                 }
1960
1961                 r = ni_mc_load_microcode(rdev);
1962                 if (r) {
1963                         DRM_ERROR("Failed to load MC firmware!\n");
1964                         return r;
1965                 }
1966         }
1967
1968         r = r600_vram_scratch_init(rdev);
1969         if (r)
1970                 return r;
1971
1972         evergreen_mc_program(rdev);
1973         r = cayman_pcie_gart_enable(rdev);
1974         if (r)
1975                 return r;
1976         cayman_gpu_init(rdev);
1977
1978         r = evergreen_blit_init(rdev);
1979         if (r) {
1980                 r600_blit_fini(rdev);
1981                 rdev->asic->copy.copy = NULL;
1982                 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
1983         }
1984
1985         /* allocate rlc buffers */
1986         if (rdev->flags & RADEON_IS_IGP) {
1987                 r = si_rlc_init(rdev);
1988                 if (r) {
1989                         DRM_ERROR("Failed to init rlc BOs!\n");
1990                         return r;
1991                 }
1992         }
1993
1994         /* allocate wb buffer */
1995         r = radeon_wb_init(rdev);
1996         if (r)
1997                 return r;
1998
1999         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
2000         if (r) {
2001                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2002                 return r;
2003         }
2004
2005         r = rv770_uvd_resume(rdev);
2006         if (!r) {
2007                 r = radeon_fence_driver_start_ring(rdev,
2008                                                    R600_RING_TYPE_UVD_INDEX);
2009                 if (r)
2010                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
2011         }
2012         if (r)
2013                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
2014
2015         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
2016         if (r) {
2017                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2018                 return r;
2019         }
2020
2021         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
2022         if (r) {
2023                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2024                 return r;
2025         }
2026
2027         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2028         if (r) {
2029                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2030                 return r;
2031         }
2032
2033         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
2034         if (r) {
2035                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2036                 return r;
2037         }
2038
2039         /* Enable IRQ */
2040         if (!rdev->irq.installed) {
2041                 r = radeon_irq_kms_init(rdev);
2042                 if (r)
2043                         return r;
2044         }
2045
2046         r = r600_irq_init(rdev);
2047         if (r) {
2048                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
2049                 radeon_irq_kms_fini(rdev);
2050                 return r;
2051         }
2052         evergreen_irq_set(rdev);
2053
2054         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2055                              CP_RB0_RPTR, CP_RB0_WPTR,
2056                              0, 0xfffff, RADEON_CP_PACKET2);
2057         if (r)
2058                 return r;
2059
2060         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2061         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2062                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
2063                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
2064                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2065         if (r)
2066                 return r;
2067
2068         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2069         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
2070                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
2071                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
2072                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2073         if (r)
2074                 return r;
2075
2076         r = cayman_cp_load_microcode(rdev);
2077         if (r)
2078                 return r;
2079         r = cayman_cp_resume(rdev);
2080         if (r)
2081                 return r;
2082
2083         r = cayman_dma_resume(rdev);
2084         if (r)
2085                 return r;
2086
2087         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2088         if (ring->ring_size) {
2089                 r = radeon_ring_init(rdev, ring, ring->ring_size,
2090                                      R600_WB_UVD_RPTR_OFFSET,
2091                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2092                                      0, 0xfffff, RADEON_CP_PACKET2);
2093                 if (!r)
2094                         r = r600_uvd_init(rdev);
2095                 if (r)
2096                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2097         }
2098
2099         r = radeon_ib_pool_init(rdev);
2100         if (r) {
2101                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2102                 return r;
2103         }
2104
2105         r = radeon_vm_manager_init(rdev);
2106         if (r) {
2107                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2108                 return r;
2109         }
2110
2111         r = r600_audio_init(rdev);
2112         if (r)
2113                 return r;
2114
2115         return 0;
2116 }
2117
2118 int cayman_resume(struct radeon_device *rdev)
2119 {
2120         int r;
2121
2122         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2123          * posting will perform necessary task to bring back GPU into good
2124          * shape.
2125          */
2126         /* post card */
2127         atom_asic_init(rdev->mode_info.atom_context);
2128
2129         /* init golden registers */
2130         ni_init_golden_registers(rdev);
2131
2132         rdev->accel_working = true;
2133         r = cayman_startup(rdev);
2134         if (r) {
2135                 DRM_ERROR("cayman startup failed on resume\n");
2136                 rdev->accel_working = false;
2137                 return r;
2138         }
2139         return r;
2140 }
2141
2142 int cayman_suspend(struct radeon_device *rdev)
2143 {
2144         r600_audio_fini(rdev);
2145         radeon_vm_manager_fini(rdev);
2146         cayman_cp_enable(rdev, false);
2147         cayman_dma_stop(rdev);
2148         r600_uvd_rbc_stop(rdev);
2149         radeon_uvd_suspend(rdev);
2150         evergreen_irq_suspend(rdev);
2151         radeon_wb_disable(rdev);
2152         cayman_pcie_gart_disable(rdev);
2153         return 0;
2154 }
2155
2156 /* Plan is to move initialization in that function and use
2157  * helper function so that radeon_device_init pretty much
2158  * do nothing more than calling asic specific function. This
2159  * should also allow to remove a bunch of callback function
2160  * like vram_info.
2161  */
2162 int cayman_init(struct radeon_device *rdev)
2163 {
2164         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2165         int r;
2166
2167         /* Read BIOS */
2168         if (!radeon_get_bios(rdev)) {
2169                 if (ASIC_IS_AVIVO(rdev))
2170                         return -EINVAL;
2171         }
2172         /* Must be an ATOMBIOS */
2173         if (!rdev->is_atom_bios) {
2174                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2175                 return -EINVAL;
2176         }
2177         r = radeon_atombios_init(rdev);
2178         if (r)
2179                 return r;
2180
2181         /* Post card if necessary */
2182         if (!radeon_card_posted(rdev)) {
2183                 if (!rdev->bios) {
2184                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2185                         return -EINVAL;
2186                 }
2187                 DRM_INFO("GPU not posted. posting now...\n");
2188                 atom_asic_init(rdev->mode_info.atom_context);
2189         }
2190         /* init golden registers */
2191         ni_init_golden_registers(rdev);
2192         /* Initialize scratch registers */
2193         r600_scratch_init(rdev);
2194         /* Initialize surface registers */
2195         radeon_surface_init(rdev);
2196         /* Initialize clocks */
2197         radeon_get_clock_info(rdev->ddev);
2198         /* Fence driver */
2199         r = radeon_fence_driver_init(rdev);
2200         if (r)
2201                 return r;
2202         /* initialize memory controller */
2203         r = evergreen_mc_init(rdev);
2204         if (r)
2205                 return r;
2206         /* Memory manager */
2207         r = radeon_bo_init(rdev);
2208         if (r)
2209                 return r;
2210
2211         ring->ring_obj = NULL;
2212         r600_ring_init(rdev, ring, 1024 * 1024);
2213
2214         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2215         ring->ring_obj = NULL;
2216         r600_ring_init(rdev, ring, 64 * 1024);
2217
2218         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2219         ring->ring_obj = NULL;
2220         r600_ring_init(rdev, ring, 64 * 1024);
2221
2222         r = radeon_uvd_init(rdev);
2223         if (!r) {
2224                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2225                 ring->ring_obj = NULL;
2226                 r600_ring_init(rdev, ring, 4096);
2227         }
2228
2229         rdev->ih.ring_obj = NULL;
2230         r600_ih_ring_init(rdev, 64 * 1024);
2231
2232         r = r600_pcie_gart_init(rdev);
2233         if (r)
2234                 return r;
2235
2236         rdev->accel_working = true;
2237         r = cayman_startup(rdev);
2238         if (r) {
2239                 dev_err(rdev->dev, "disabling GPU acceleration\n");
2240                 cayman_cp_fini(rdev);
2241                 cayman_dma_fini(rdev);
2242                 r600_irq_fini(rdev);
2243                 if (rdev->flags & RADEON_IS_IGP)
2244                         si_rlc_fini(rdev);
2245                 radeon_wb_fini(rdev);
2246                 radeon_ib_pool_fini(rdev);
2247                 radeon_vm_manager_fini(rdev);
2248                 radeon_irq_kms_fini(rdev);
2249                 cayman_pcie_gart_fini(rdev);
2250                 rdev->accel_working = false;
2251         }
2252
2253         /* Don't start up if the MC ucode is missing.
2254          * The default clocks and voltages before the MC ucode
2255          * is loaded are not suffient for advanced operations.
2256          *
2257          * We can skip this check for TN, because there is no MC
2258          * ucode.
2259          */
2260         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2261                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
2262                 return -EINVAL;
2263         }
2264
2265         return 0;
2266 }
2267
2268 void cayman_fini(struct radeon_device *rdev)
2269 {
2270         r600_blit_fini(rdev);
2271         cayman_cp_fini(rdev);
2272         cayman_dma_fini(rdev);
2273         r600_irq_fini(rdev);
2274         if (rdev->flags & RADEON_IS_IGP)
2275                 si_rlc_fini(rdev);
2276         radeon_wb_fini(rdev);
2277         radeon_vm_manager_fini(rdev);
2278         radeon_ib_pool_fini(rdev);
2279         radeon_irq_kms_fini(rdev);
2280         radeon_uvd_fini(rdev);
2281         cayman_pcie_gart_fini(rdev);
2282         r600_vram_scratch_fini(rdev);
2283         radeon_gem_fini(rdev);
2284         radeon_fence_driver_fini(rdev);
2285         radeon_bo_fini(rdev);
2286         radeon_atombios_fini(rdev);
2287         ni_fini_microcode(rdev);
2288         drm_free(rdev->bios, M_DRM);
2289         rdev->bios = NULL;
2290 }
2291
2292 /*
2293  * vm
2294  */
2295 int cayman_vm_init(struct radeon_device *rdev)
2296 {
2297         /* number of VMs */
2298         rdev->vm_manager.nvm = 8;
2299         /* base offset of vram pages */
2300         if (rdev->flags & RADEON_IS_IGP) {
2301                 u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2302                 tmp <<= 22;
2303                 rdev->vm_manager.vram_base_offset = tmp;
2304         } else
2305                 rdev->vm_manager.vram_base_offset = 0;
2306         return 0;
2307 }
2308
2309 void cayman_vm_fini(struct radeon_device *rdev)
2310 {
2311 }
2312
2313 #define R600_ENTRY_VALID   (1 << 0)
2314 #define R600_PTE_SYSTEM    (1 << 1)
2315 #define R600_PTE_SNOOPED   (1 << 2)
2316 #define R600_PTE_READABLE  (1 << 5)
2317 #define R600_PTE_WRITEABLE (1 << 6)
2318
2319 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2320 {
2321         uint32_t r600_flags = 0;
2322         r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2323         r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2324         r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2325         if (flags & RADEON_VM_PAGE_SYSTEM) {
2326                 r600_flags |= R600_PTE_SYSTEM;
2327                 r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2328         }
2329         return r600_flags;
2330 }
2331
2332 /**
2333  * cayman_vm_set_page - update the page tables using the CP
2334  *
2335  * @rdev: radeon_device pointer
2336  * @ib: indirect buffer to fill with commands
2337  * @pe: addr of the page entry
2338  * @addr: dst addr to write into pe
2339  * @count: number of page entries to update
2340  * @incr: increase next addr by incr bytes
2341  * @flags: access flags
2342  *
2343  * Update the page tables using the CP (cayman/TN).
2344  */
2345 void cayman_vm_set_page(struct radeon_device *rdev,
2346                         struct radeon_ib *ib,
2347                         uint64_t pe,
2348                         uint64_t addr, unsigned count,
2349                         uint32_t incr, uint32_t flags)
2350 {
2351         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2352         uint64_t value;
2353         unsigned ndw;
2354
2355         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2356                 while (count) {
2357                         ndw = 1 + count * 2;
2358                         if (ndw > 0x3FFF)
2359                                 ndw = 0x3FFF;
2360
2361                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2362                         ib->ptr[ib->length_dw++] = pe;
2363                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2364                         for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2365                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
2366                                         value = radeon_vm_map_gart(rdev, addr);
2367                                         value &= 0xFFFFFFFFFFFFF000ULL;
2368                                 } else if (flags & RADEON_VM_PAGE_VALID) {
2369                                         value = addr;
2370                                 } else {
2371                                         value = 0;
2372                                 }
2373                                 addr += incr;
2374                                 value |= r600_flags;
2375                                 ib->ptr[ib->length_dw++] = value;
2376                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2377                         }
2378                 }
2379         } else {
2380                 if ((flags & RADEON_VM_PAGE_SYSTEM) ||
2381                     (count == 1)) {
2382                         while (count) {
2383                                 ndw = count * 2;
2384                                 if (ndw > 0xFFFFE)
2385                                         ndw = 0xFFFFE;
2386
2387                                 /* for non-physically contiguous pages (system) */
2388                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
2389                                 ib->ptr[ib->length_dw++] = pe;
2390                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2391                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2392                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
2393                                                 value = radeon_vm_map_gart(rdev, addr);
2394                                                 value &= 0xFFFFFFFFFFFFF000ULL;
2395                                         } else if (flags & RADEON_VM_PAGE_VALID) {
2396                                                 value = addr;
2397                                         } else {
2398                                                 value = 0;
2399                                         }
2400                                         addr += incr;
2401                                         value |= r600_flags;
2402                                         ib->ptr[ib->length_dw++] = value;
2403                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
2404                                 }
2405                         }
2406                         while (ib->length_dw & 0x7)
2407                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2408                 } else {
2409                         while (count) {
2410                                 ndw = count * 2;
2411                                 if (ndw > 0xFFFFE)
2412                                         ndw = 0xFFFFE;
2413
2414                                 if (flags & RADEON_VM_PAGE_VALID)
2415                                         value = addr;
2416                                 else
2417                                         value = 0;
2418                                 /* for physically contiguous pages (vram) */
2419                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
2420                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
2421                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2422                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
2423                                 ib->ptr[ib->length_dw++] = 0;
2424                                 ib->ptr[ib->length_dw++] = value; /* value */
2425                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2426                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
2427                                 ib->ptr[ib->length_dw++] = 0;
2428                                 pe += ndw * 4;
2429                                 addr += (ndw / 2) * incr;
2430                                 count -= ndw / 2;
2431                         }
2432                 }
2433                 while (ib->length_dw & 0x7)
2434                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2435         }
2436 }
2437
2438 /**
2439  * cayman_vm_flush - vm flush using the CP
2440  *
2441  * @rdev: radeon_device pointer
2442  *
2443  * Update the page table base and flush the VM TLB
2444  * using the CP (cayman-si).
2445  */
2446 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2447 {
2448         struct radeon_ring *ring = &rdev->ring[ridx];
2449
2450         if (vm == NULL)
2451                 return;
2452
2453         radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2454         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2455
2456         /* flush hdp cache */
2457         radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2458         radeon_ring_write(ring, 0x1);
2459
2460         /* bits 0-7 are the VM contexts0-7 */
2461         radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2462         radeon_ring_write(ring, 1 << vm->id);
2463
2464         /* sync PFP to ME, otherwise we might get invalid PFP reads */
2465         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2466         radeon_ring_write(ring, 0x0);
2467 }
2468
2469 void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2470 {
2471         struct radeon_ring *ring = &rdev->ring[ridx];
2472
2473         if (vm == NULL)
2474                 return;
2475
2476         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2477         radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2478         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2479
2480         /* flush hdp cache */
2481         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2482         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2483         radeon_ring_write(ring, 1);
2484
2485         /* bits 0-7 are the VM contexts0-7 */
2486         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2487         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2488         radeon_ring_write(ring, 1 << vm->id);
2489 }
2490