2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/module.h>
28 #include "radeon_asic.h"
31 #include "cik_blit_shaders.h"
34 #define CIK_PFP_UCODE_SIZE 2144
35 #define CIK_ME_UCODE_SIZE 2144
36 #define CIK_CE_UCODE_SIZE 2144
38 #define CIK_MEC_UCODE_SIZE 4192
40 #define BONAIRE_RLC_UCODE_SIZE 2048
41 #define KB_RLC_UCODE_SIZE 2560
42 #define KV_RLC_UCODE_SIZE 2560
44 #define CIK_MC_UCODE_SIZE 7866
46 #define CIK_SDMA_UCODE_SIZE 1050
47 #define CIK_SDMA_UCODE_VERSION 64
49 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
62 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
63 MODULE_FIRMWARE("radeon/KABINI_me.bin");
64 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
65 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
66 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
67 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
69 static void cik_rlc_stop(struct radeon_device *rdev);
72 * Indirect registers accessor
74 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
78 WREG32(PCIE_INDEX, reg);
79 (void)RREG32(PCIE_INDEX);
80 r = RREG32(PCIE_DATA);
84 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
86 WREG32(PCIE_INDEX, reg);
87 (void)RREG32(PCIE_INDEX);
89 (void)RREG32(PCIE_DATA);
92 static const u32 bonaire_golden_spm_registers[] =
94 0x30800, 0xe0ffffff, 0xe0000000
97 static const u32 bonaire_golden_common_registers[] =
99 0xc770, 0xffffffff, 0x00000800,
100 0xc774, 0xffffffff, 0x00000800,
101 0xc798, 0xffffffff, 0x00007fbf,
102 0xc79c, 0xffffffff, 0x00007faf
105 static const u32 bonaire_golden_registers[] =
107 0x3354, 0x00000333, 0x00000333,
108 0x3350, 0x000c0fc0, 0x00040200,
109 0x9a10, 0x00010000, 0x00058208,
110 0x3c000, 0xffff1fff, 0x00140000,
111 0x3c200, 0xfdfc0fff, 0x00000100,
112 0x3c234, 0x40000000, 0x40000200,
113 0x9830, 0xffffffff, 0x00000000,
114 0x9834, 0xf00fffff, 0x00000400,
115 0x9838, 0x0002021c, 0x00020200,
116 0xc78, 0x00000080, 0x00000000,
117 0x5bb0, 0x000000f0, 0x00000070,
118 0x5bc0, 0xf0311fff, 0x80300000,
119 0x98f8, 0x73773777, 0x12010001,
120 0x350c, 0x00810000, 0x408af000,
121 0x7030, 0x31000111, 0x00000011,
122 0x2f48, 0x73773777, 0x12010001,
123 0x220c, 0x00007fb6, 0x0021a1b1,
124 0x2210, 0x00007fb6, 0x002021b1,
125 0x2180, 0x00007fb6, 0x00002191,
126 0x2218, 0x00007fb6, 0x002121b1,
127 0x221c, 0x00007fb6, 0x002021b1,
128 0x21dc, 0x00007fb6, 0x00002191,
129 0x21e0, 0x00007fb6, 0x00002191,
130 0x3628, 0x0000003f, 0x0000000a,
131 0x362c, 0x0000003f, 0x0000000a,
132 0x2ae4, 0x00073ffe, 0x000022a2,
133 0x240c, 0x000007ff, 0x00000000,
134 0x8a14, 0xf000003f, 0x00000007,
135 0x8bf0, 0x00002001, 0x00000001,
136 0x8b24, 0xffffffff, 0x00ffffff,
137 0x30a04, 0x0000ff0f, 0x00000000,
138 0x28a4c, 0x07ffffff, 0x06000000,
139 0x4d8, 0x00000fff, 0x00000100,
140 0x3e78, 0x00000001, 0x00000002,
141 0x9100, 0x03000000, 0x0362c688,
142 0x8c00, 0x000000ff, 0x00000001,
143 0xe40, 0x00001fff, 0x00001fff,
144 0x9060, 0x0000007f, 0x00000020,
145 0x9508, 0x00010000, 0x00010000,
146 0xac14, 0x000003ff, 0x000000f3,
147 0xac0c, 0xffffffff, 0x00001032
150 static const u32 bonaire_mgcg_cgcg_init[] =
152 0xc420, 0xffffffff, 0xfffffffc,
153 0x30800, 0xffffffff, 0xe0000000,
154 0x3c2a0, 0xffffffff, 0x00000100,
155 0x3c208, 0xffffffff, 0x00000100,
156 0x3c2c0, 0xffffffff, 0xc0000100,
157 0x3c2c8, 0xffffffff, 0xc0000100,
158 0x3c2c4, 0xffffffff, 0xc0000100,
159 0x55e4, 0xffffffff, 0x00600100,
160 0x3c280, 0xffffffff, 0x00000100,
161 0x3c214, 0xffffffff, 0x06000100,
162 0x3c220, 0xffffffff, 0x00000100,
163 0x3c218, 0xffffffff, 0x06000100,
164 0x3c204, 0xffffffff, 0x00000100,
165 0x3c2e0, 0xffffffff, 0x00000100,
166 0x3c224, 0xffffffff, 0x00000100,
167 0x3c200, 0xffffffff, 0x00000100,
168 0x3c230, 0xffffffff, 0x00000100,
169 0x3c234, 0xffffffff, 0x00000100,
170 0x3c250, 0xffffffff, 0x00000100,
171 0x3c254, 0xffffffff, 0x00000100,
172 0x3c258, 0xffffffff, 0x00000100,
173 0x3c25c, 0xffffffff, 0x00000100,
174 0x3c260, 0xffffffff, 0x00000100,
175 0x3c27c, 0xffffffff, 0x00000100,
176 0x3c278, 0xffffffff, 0x00000100,
177 0x3c210, 0xffffffff, 0x06000100,
178 0x3c290, 0xffffffff, 0x00000100,
179 0x3c274, 0xffffffff, 0x00000100,
180 0x3c2b4, 0xffffffff, 0x00000100,
181 0x3c2b0, 0xffffffff, 0x00000100,
182 0x3c270, 0xffffffff, 0x00000100,
183 0x30800, 0xffffffff, 0xe0000000,
184 0x3c020, 0xffffffff, 0x00010000,
185 0x3c024, 0xffffffff, 0x00030002,
186 0x3c028, 0xffffffff, 0x00040007,
187 0x3c02c, 0xffffffff, 0x00060005,
188 0x3c030, 0xffffffff, 0x00090008,
189 0x3c034, 0xffffffff, 0x00010000,
190 0x3c038, 0xffffffff, 0x00030002,
191 0x3c03c, 0xffffffff, 0x00040007,
192 0x3c040, 0xffffffff, 0x00060005,
193 0x3c044, 0xffffffff, 0x00090008,
194 0x3c048, 0xffffffff, 0x00010000,
195 0x3c04c, 0xffffffff, 0x00030002,
196 0x3c050, 0xffffffff, 0x00040007,
197 0x3c054, 0xffffffff, 0x00060005,
198 0x3c058, 0xffffffff, 0x00090008,
199 0x3c05c, 0xffffffff, 0x00010000,
200 0x3c060, 0xffffffff, 0x00030002,
201 0x3c064, 0xffffffff, 0x00040007,
202 0x3c068, 0xffffffff, 0x00060005,
203 0x3c06c, 0xffffffff, 0x00090008,
204 0x3c070, 0xffffffff, 0x00010000,
205 0x3c074, 0xffffffff, 0x00030002,
206 0x3c078, 0xffffffff, 0x00040007,
207 0x3c07c, 0xffffffff, 0x00060005,
208 0x3c080, 0xffffffff, 0x00090008,
209 0x3c084, 0xffffffff, 0x00010000,
210 0x3c088, 0xffffffff, 0x00030002,
211 0x3c08c, 0xffffffff, 0x00040007,
212 0x3c090, 0xffffffff, 0x00060005,
213 0x3c094, 0xffffffff, 0x00090008,
214 0x3c098, 0xffffffff, 0x00010000,
215 0x3c09c, 0xffffffff, 0x00030002,
216 0x3c0a0, 0xffffffff, 0x00040007,
217 0x3c0a4, 0xffffffff, 0x00060005,
218 0x3c0a8, 0xffffffff, 0x00090008,
219 0x3c000, 0xffffffff, 0x96e00200,
220 0x8708, 0xffffffff, 0x00900100,
221 0xc424, 0xffffffff, 0x0020003f,
222 0x38, 0xffffffff, 0x0140001c,
223 0x3c, 0x000f0000, 0x000f0000,
224 0x220, 0xffffffff, 0xC060000C,
225 0x224, 0xc0000fff, 0x00000100,
226 0xf90, 0xffffffff, 0x00000100,
227 0xf98, 0x00000101, 0x00000000,
228 0x20a8, 0xffffffff, 0x00000104,
229 0x55e4, 0xff000fff, 0x00000100,
230 0x30cc, 0xc0000fff, 0x00000104,
231 0xc1e4, 0x00000001, 0x00000001,
232 0xd00c, 0xff000ff0, 0x00000100,
233 0xd80c, 0xff000ff0, 0x00000100
236 static const u32 spectre_golden_spm_registers[] =
238 0x30800, 0xe0ffffff, 0xe0000000
241 static const u32 spectre_golden_common_registers[] =
243 0xc770, 0xffffffff, 0x00000800,
244 0xc774, 0xffffffff, 0x00000800,
245 0xc798, 0xffffffff, 0x00007fbf,
246 0xc79c, 0xffffffff, 0x00007faf
249 static const u32 spectre_golden_registers[] =
251 0x3c000, 0xffff1fff, 0x96940200,
252 0x3c00c, 0xffff0001, 0xff000000,
253 0x3c200, 0xfffc0fff, 0x00000100,
254 0x6ed8, 0x00010101, 0x00010000,
255 0x9834, 0xf00fffff, 0x00000400,
256 0x9838, 0xfffffffc, 0x00020200,
257 0x5bb0, 0x000000f0, 0x00000070,
258 0x5bc0, 0xf0311fff, 0x80300000,
259 0x98f8, 0x73773777, 0x12010001,
260 0x9b7c, 0x00ff0000, 0x00fc0000,
261 0x2f48, 0x73773777, 0x12010001,
262 0x8a14, 0xf000003f, 0x00000007,
263 0x8b24, 0xffffffff, 0x00ffffff,
264 0x28350, 0x3f3f3fff, 0x00000082,
265 0x28355, 0x0000003f, 0x00000000,
266 0x3e78, 0x00000001, 0x00000002,
267 0x913c, 0xffff03df, 0x00000004,
268 0xc768, 0x00000008, 0x00000008,
269 0x8c00, 0x000008ff, 0x00000800,
270 0x9508, 0x00010000, 0x00010000,
271 0xac0c, 0xffffffff, 0x54763210,
272 0x214f8, 0x01ff01ff, 0x00000002,
273 0x21498, 0x007ff800, 0x00200000,
274 0x2015c, 0xffffffff, 0x00000f40,
275 0x30934, 0xffffffff, 0x00000001
278 static const u32 spectre_mgcg_cgcg_init[] =
280 0xc420, 0xffffffff, 0xfffffffc,
281 0x30800, 0xffffffff, 0xe0000000,
282 0x3c2a0, 0xffffffff, 0x00000100,
283 0x3c208, 0xffffffff, 0x00000100,
284 0x3c2c0, 0xffffffff, 0x00000100,
285 0x3c2c8, 0xffffffff, 0x00000100,
286 0x3c2c4, 0xffffffff, 0x00000100,
287 0x55e4, 0xffffffff, 0x00600100,
288 0x3c280, 0xffffffff, 0x00000100,
289 0x3c214, 0xffffffff, 0x06000100,
290 0x3c220, 0xffffffff, 0x00000100,
291 0x3c218, 0xffffffff, 0x06000100,
292 0x3c204, 0xffffffff, 0x00000100,
293 0x3c2e0, 0xffffffff, 0x00000100,
294 0x3c224, 0xffffffff, 0x00000100,
295 0x3c200, 0xffffffff, 0x00000100,
296 0x3c230, 0xffffffff, 0x00000100,
297 0x3c234, 0xffffffff, 0x00000100,
298 0x3c250, 0xffffffff, 0x00000100,
299 0x3c254, 0xffffffff, 0x00000100,
300 0x3c258, 0xffffffff, 0x00000100,
301 0x3c25c, 0xffffffff, 0x00000100,
302 0x3c260, 0xffffffff, 0x00000100,
303 0x3c27c, 0xffffffff, 0x00000100,
304 0x3c278, 0xffffffff, 0x00000100,
305 0x3c210, 0xffffffff, 0x06000100,
306 0x3c290, 0xffffffff, 0x00000100,
307 0x3c274, 0xffffffff, 0x00000100,
308 0x3c2b4, 0xffffffff, 0x00000100,
309 0x3c2b0, 0xffffffff, 0x00000100,
310 0x3c270, 0xffffffff, 0x00000100,
311 0x30800, 0xffffffff, 0xe0000000,
312 0x3c020, 0xffffffff, 0x00010000,
313 0x3c024, 0xffffffff, 0x00030002,
314 0x3c028, 0xffffffff, 0x00040007,
315 0x3c02c, 0xffffffff, 0x00060005,
316 0x3c030, 0xffffffff, 0x00090008,
317 0x3c034, 0xffffffff, 0x00010000,
318 0x3c038, 0xffffffff, 0x00030002,
319 0x3c03c, 0xffffffff, 0x00040007,
320 0x3c040, 0xffffffff, 0x00060005,
321 0x3c044, 0xffffffff, 0x00090008,
322 0x3c048, 0xffffffff, 0x00010000,
323 0x3c04c, 0xffffffff, 0x00030002,
324 0x3c050, 0xffffffff, 0x00040007,
325 0x3c054, 0xffffffff, 0x00060005,
326 0x3c058, 0xffffffff, 0x00090008,
327 0x3c05c, 0xffffffff, 0x00010000,
328 0x3c060, 0xffffffff, 0x00030002,
329 0x3c064, 0xffffffff, 0x00040007,
330 0x3c068, 0xffffffff, 0x00060005,
331 0x3c06c, 0xffffffff, 0x00090008,
332 0x3c070, 0xffffffff, 0x00010000,
333 0x3c074, 0xffffffff, 0x00030002,
334 0x3c078, 0xffffffff, 0x00040007,
335 0x3c07c, 0xffffffff, 0x00060005,
336 0x3c080, 0xffffffff, 0x00090008,
337 0x3c084, 0xffffffff, 0x00010000,
338 0x3c088, 0xffffffff, 0x00030002,
339 0x3c08c, 0xffffffff, 0x00040007,
340 0x3c090, 0xffffffff, 0x00060005,
341 0x3c094, 0xffffffff, 0x00090008,
342 0x3c098, 0xffffffff, 0x00010000,
343 0x3c09c, 0xffffffff, 0x00030002,
344 0x3c0a0, 0xffffffff, 0x00040007,
345 0x3c0a4, 0xffffffff, 0x00060005,
346 0x3c0a8, 0xffffffff, 0x00090008,
347 0x3c0ac, 0xffffffff, 0x00010000,
348 0x3c0b0, 0xffffffff, 0x00030002,
349 0x3c0b4, 0xffffffff, 0x00040007,
350 0x3c0b8, 0xffffffff, 0x00060005,
351 0x3c0bc, 0xffffffff, 0x00090008,
352 0x3c000, 0xffffffff, 0x96e00200,
353 0x8708, 0xffffffff, 0x00900100,
354 0xc424, 0xffffffff, 0x0020003f,
355 0x38, 0xffffffff, 0x0140001c,
356 0x3c, 0x000f0000, 0x000f0000,
357 0x220, 0xffffffff, 0xC060000C,
358 0x224, 0xc0000fff, 0x00000100,
359 0xf90, 0xffffffff, 0x00000100,
360 0xf98, 0x00000101, 0x00000000,
361 0x20a8, 0xffffffff, 0x00000104,
362 0x55e4, 0xff000fff, 0x00000100,
363 0x30cc, 0xc0000fff, 0x00000104,
364 0xc1e4, 0x00000001, 0x00000001,
365 0xd00c, 0xff000ff0, 0x00000100,
366 0xd80c, 0xff000ff0, 0x00000100
369 static const u32 kalindi_golden_spm_registers[] =
371 0x30800, 0xe0ffffff, 0xe0000000
374 static const u32 kalindi_golden_common_registers[] =
376 0xc770, 0xffffffff, 0x00000800,
377 0xc774, 0xffffffff, 0x00000800,
378 0xc798, 0xffffffff, 0x00007fbf,
379 0xc79c, 0xffffffff, 0x00007faf
382 static const u32 kalindi_golden_registers[] =
384 0x3c000, 0xffffdfff, 0x6e944040,
385 0x55e4, 0xff607fff, 0xfc000100,
386 0x3c220, 0xff000fff, 0x00000100,
387 0x3c224, 0xff000fff, 0x00000100,
388 0x3c200, 0xfffc0fff, 0x00000100,
389 0x6ed8, 0x00010101, 0x00010000,
390 0x9830, 0xffffffff, 0x00000000,
391 0x9834, 0xf00fffff, 0x00000400,
392 0x5bb0, 0x000000f0, 0x00000070,
393 0x5bc0, 0xf0311fff, 0x80300000,
394 0x98f8, 0x73773777, 0x12010001,
395 0x98fc, 0xffffffff, 0x00000010,
396 0x9b7c, 0x00ff0000, 0x00fc0000,
397 0x8030, 0x00001f0f, 0x0000100a,
398 0x2f48, 0x73773777, 0x12010001,
399 0x2408, 0x000fffff, 0x000c007f,
400 0x8a14, 0xf000003f, 0x00000007,
401 0x8b24, 0x3fff3fff, 0x00ffcfff,
402 0x30a04, 0x0000ff0f, 0x00000000,
403 0x28a4c, 0x07ffffff, 0x06000000,
404 0x4d8, 0x00000fff, 0x00000100,
405 0x3e78, 0x00000001, 0x00000002,
406 0xc768, 0x00000008, 0x00000008,
407 0x8c00, 0x000000ff, 0x00000003,
408 0x214f8, 0x01ff01ff, 0x00000002,
409 0x21498, 0x007ff800, 0x00200000,
410 0x2015c, 0xffffffff, 0x00000f40,
411 0x88c4, 0x001f3ae3, 0x00000082,
412 0x88d4, 0x0000001f, 0x00000010,
413 0x30934, 0xffffffff, 0x00000000
416 static const u32 kalindi_mgcg_cgcg_init[] =
418 0xc420, 0xffffffff, 0xfffffffc,
419 0x30800, 0xffffffff, 0xe0000000,
420 0x3c2a0, 0xffffffff, 0x00000100,
421 0x3c208, 0xffffffff, 0x00000100,
422 0x3c2c0, 0xffffffff, 0x00000100,
423 0x3c2c8, 0xffffffff, 0x00000100,
424 0x3c2c4, 0xffffffff, 0x00000100,
425 0x55e4, 0xffffffff, 0x00600100,
426 0x3c280, 0xffffffff, 0x00000100,
427 0x3c214, 0xffffffff, 0x06000100,
428 0x3c220, 0xffffffff, 0x00000100,
429 0x3c218, 0xffffffff, 0x06000100,
430 0x3c204, 0xffffffff, 0x00000100,
431 0x3c2e0, 0xffffffff, 0x00000100,
432 0x3c224, 0xffffffff, 0x00000100,
433 0x3c200, 0xffffffff, 0x00000100,
434 0x3c230, 0xffffffff, 0x00000100,
435 0x3c234, 0xffffffff, 0x00000100,
436 0x3c250, 0xffffffff, 0x00000100,
437 0x3c254, 0xffffffff, 0x00000100,
438 0x3c258, 0xffffffff, 0x00000100,
439 0x3c25c, 0xffffffff, 0x00000100,
440 0x3c260, 0xffffffff, 0x00000100,
441 0x3c27c, 0xffffffff, 0x00000100,
442 0x3c278, 0xffffffff, 0x00000100,
443 0x3c210, 0xffffffff, 0x06000100,
444 0x3c290, 0xffffffff, 0x00000100,
445 0x3c274, 0xffffffff, 0x00000100,
446 0x3c2b4, 0xffffffff, 0x00000100,
447 0x3c2b0, 0xffffffff, 0x00000100,
448 0x3c270, 0xffffffff, 0x00000100,
449 0x30800, 0xffffffff, 0xe0000000,
450 0x3c020, 0xffffffff, 0x00010000,
451 0x3c024, 0xffffffff, 0x00030002,
452 0x3c028, 0xffffffff, 0x00040007,
453 0x3c02c, 0xffffffff, 0x00060005,
454 0x3c030, 0xffffffff, 0x00090008,
455 0x3c034, 0xffffffff, 0x00010000,
456 0x3c038, 0xffffffff, 0x00030002,
457 0x3c03c, 0xffffffff, 0x00040007,
458 0x3c040, 0xffffffff, 0x00060005,
459 0x3c044, 0xffffffff, 0x00090008,
460 0x3c000, 0xffffffff, 0x96e00200,
461 0x8708, 0xffffffff, 0x00900100,
462 0xc424, 0xffffffff, 0x0020003f,
463 0x38, 0xffffffff, 0x0140001c,
464 0x3c, 0x000f0000, 0x000f0000,
465 0x220, 0xffffffff, 0xC060000C,
466 0x224, 0xc0000fff, 0x00000100,
467 0x20a8, 0xffffffff, 0x00000104,
468 0x55e4, 0xff000fff, 0x00000100,
469 0x30cc, 0xc0000fff, 0x00000104,
470 0xc1e4, 0x00000001, 0x00000001,
471 0xd00c, 0xff000ff0, 0x00000100,
472 0xd80c, 0xff000ff0, 0x00000100
475 static void cik_init_golden_registers(struct radeon_device *rdev)
477 switch (rdev->family) {
479 radeon_program_register_sequence(rdev,
480 bonaire_mgcg_cgcg_init,
481 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
482 radeon_program_register_sequence(rdev,
483 bonaire_golden_registers,
484 (const u32)ARRAY_SIZE(bonaire_golden_registers));
485 radeon_program_register_sequence(rdev,
486 bonaire_golden_common_registers,
487 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
488 radeon_program_register_sequence(rdev,
489 bonaire_golden_spm_registers,
490 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
493 radeon_program_register_sequence(rdev,
494 kalindi_mgcg_cgcg_init,
495 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
496 radeon_program_register_sequence(rdev,
497 kalindi_golden_registers,
498 (const u32)ARRAY_SIZE(kalindi_golden_registers));
499 radeon_program_register_sequence(rdev,
500 kalindi_golden_common_registers,
501 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
502 radeon_program_register_sequence(rdev,
503 kalindi_golden_spm_registers,
504 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
507 radeon_program_register_sequence(rdev,
508 spectre_mgcg_cgcg_init,
509 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
510 radeon_program_register_sequence(rdev,
511 spectre_golden_registers,
512 (const u32)ARRAY_SIZE(spectre_golden_registers));
513 radeon_program_register_sequence(rdev,
514 spectre_golden_common_registers,
515 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
516 radeon_program_register_sequence(rdev,
517 spectre_golden_spm_registers,
518 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
526 * cik_get_xclk - get the xclk
528 * @rdev: radeon_device pointer
530 * Returns the reference clock used by the gfx engine
533 u32 cik_get_xclk(struct radeon_device *rdev)
535 u32 reference_clock = rdev->clock.spll.reference_freq;
537 if (rdev->flags & RADEON_IS_IGP) {
538 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
539 return reference_clock / 2;
541 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
542 return reference_clock / 4;
544 return reference_clock;
548 * cik_mm_rdoorbell - read a doorbell dword
550 * @rdev: radeon_device pointer
551 * @offset: byte offset into the aperture
553 * Returns the value in the doorbell aperture at the
554 * requested offset (CIK).
556 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
558 if (offset < rdev->doorbell.size) {
559 return readl(((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
561 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
567 * cik_mm_wdoorbell - write a doorbell dword
569 * @rdev: radeon_device pointer
570 * @offset: byte offset into the aperture
573 * Writes @v to the doorbell aperture at the
574 * requested offset (CIK).
576 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
578 if (offset < rdev->doorbell.size) {
579 writel(v, ((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
581 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
585 #define BONAIRE_IO_MC_REGS_SIZE 36
587 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
589 {0x00000070, 0x04400000},
590 {0x00000071, 0x80c01803},
591 {0x00000072, 0x00004004},
592 {0x00000073, 0x00000100},
593 {0x00000074, 0x00ff0000},
594 {0x00000075, 0x34000000},
595 {0x00000076, 0x08000014},
596 {0x00000077, 0x00cc08ec},
597 {0x00000078, 0x00000400},
598 {0x00000079, 0x00000000},
599 {0x0000007a, 0x04090000},
600 {0x0000007c, 0x00000000},
601 {0x0000007e, 0x4408a8e8},
602 {0x0000007f, 0x00000304},
603 {0x00000080, 0x00000000},
604 {0x00000082, 0x00000001},
605 {0x00000083, 0x00000002},
606 {0x00000084, 0xf3e4f400},
607 {0x00000085, 0x052024e3},
608 {0x00000087, 0x00000000},
609 {0x00000088, 0x01000000},
610 {0x0000008a, 0x1c0a0000},
611 {0x0000008b, 0xff010000},
612 {0x0000008d, 0xffffefff},
613 {0x0000008e, 0xfff3efff},
614 {0x0000008f, 0xfff3efbf},
615 {0x00000092, 0xf7ffffff},
616 {0x00000093, 0xffffff7f},
617 {0x00000095, 0x00101101},
618 {0x00000096, 0x00000fff},
619 {0x00000097, 0x00116fff},
620 {0x00000098, 0x60010000},
621 {0x00000099, 0x10010000},
622 {0x0000009a, 0x00006000},
623 {0x0000009b, 0x00001000},
624 {0x0000009f, 0x00b48000}
628 * cik_srbm_select - select specific register instances
630 * @rdev: radeon_device pointer
631 * @me: selected ME (micro engine)
636 * Switches the currently active registers instances. Some
637 * registers are instanced per VMID, others are instanced per
638 * me/pipe/queue combination.
640 static void cik_srbm_select(struct radeon_device *rdev,
641 u32 me, u32 pipe, u32 queue, u32 vmid)
643 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
646 QUEUEID(queue & 0x7));
647 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
652 * ci_mc_load_microcode - load MC ucode into the hw
654 * @rdev: radeon_device pointer
656 * Load the GDDR MC ucode into the hw (CIK).
657 * Returns 0 on success, error on failure.
659 static __unused int ci_mc_load_microcode(struct radeon_device *rdev)
661 const __be32 *fw_data;
662 u32 running, blackout = 0;
664 int i, ucode_size, regs_size;
669 switch (rdev->family) {
672 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
673 ucode_size = CIK_MC_UCODE_SIZE;
674 regs_size = BONAIRE_IO_MC_REGS_SIZE;
678 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
682 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
683 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
686 /* reset the engine and set to writable */
687 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
688 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
690 /* load mc io regs */
691 for (i = 0; i < regs_size; i++) {
692 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
693 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
695 /* load the MC ucode */
696 fw_data = (const __be32 *)rdev->mc_fw->data;
697 for (i = 0; i < ucode_size; i++)
698 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
700 /* put the engine back into the active state */
701 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
702 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
703 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
705 /* wait for training to complete */
706 for (i = 0; i < rdev->usec_timeout; i++) {
707 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
711 for (i = 0; i < rdev->usec_timeout; i++) {
712 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
718 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
725 * cik_init_microcode - load ucode images from disk
727 * @rdev: radeon_device pointer
729 * Use the firmware interface to load the ucode images into
730 * the driver (not loaded into hw).
731 * Returns 0 on success, error on failure.
733 static int cik_init_microcode(struct radeon_device *rdev)
735 const char *chip_name;
736 size_t pfp_req_size, me_req_size, ce_req_size,
737 mec_req_size, rlc_req_size, mc_req_size,
744 switch (rdev->family) {
746 chip_name = "BONAIRE";
747 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
748 me_req_size = CIK_ME_UCODE_SIZE * 4;
749 ce_req_size = CIK_CE_UCODE_SIZE * 4;
750 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
751 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
752 mc_req_size = CIK_MC_UCODE_SIZE * 4;
753 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
756 chip_name = "KAVERI";
757 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
758 me_req_size = CIK_ME_UCODE_SIZE * 4;
759 ce_req_size = CIK_CE_UCODE_SIZE * 4;
760 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
761 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
762 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
765 chip_name = "KABINI";
766 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767 me_req_size = CIK_ME_UCODE_SIZE * 4;
768 ce_req_size = CIK_CE_UCODE_SIZE * 4;
769 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
771 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
776 DRM_INFO("Loading %s Microcode\n", chip_name);
778 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
779 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
782 if (rdev->pfp_fw->datasize != pfp_req_size) {
784 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
785 rdev->pfp_fw->datasize, fw_name);
790 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
791 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
794 if (rdev->me_fw->datasize != me_req_size) {
796 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
797 rdev->me_fw->datasize, fw_name);
801 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
802 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
805 if (rdev->ce_fw->datasize != ce_req_size) {
807 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
808 rdev->ce_fw->datasize, fw_name);
812 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
813 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
816 if (rdev->mec_fw->datasize != mec_req_size) {
818 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
819 rdev->mec_fw->datasize, fw_name);
823 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
824 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
827 if (rdev->rlc_fw->datasize != rlc_req_size) {
829 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
830 rdev->rlc_fw->datasize, fw_name);
834 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
835 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
838 if (rdev->sdma_fw->datasize != sdma_req_size) {
840 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
841 rdev->sdma_fw->datasize, fw_name);
845 /* No MC ucode on APUs */
846 if (!(rdev->flags & RADEON_IS_IGP)) {
847 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
848 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
851 if (rdev->mc_fw->datasize != mc_req_size) {
853 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
854 rdev->mc_fw->datasize, fw_name);
863 "cik_cp: Failed to load firmware \"%s\"\n",
865 release_firmware(rdev->pfp_fw);
867 release_firmware(rdev->me_fw);
869 release_firmware(rdev->ce_fw);
871 release_firmware(rdev->mec_fw);
873 release_firmware(rdev->rlc_fw);
875 release_firmware(rdev->sdma_fw);
876 rdev->sdma_fw = NULL;
877 release_firmware(rdev->mc_fw);
887 * cik_tiling_mode_table_init - init the hw tiling table
889 * @rdev: radeon_device pointer
891 * Starting with SI, the tiling setup is done globally in a
892 * set of 32 tiling modes. Rather than selecting each set of
893 * parameters per surface as on older asics, we just select
894 * which index in the tiling table we want to use, and the
895 * surface uses those parameters (CIK).
897 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
899 const u32 num_tile_mode_states = 32;
900 const u32 num_secondary_tile_mode_states = 16;
901 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
902 u32 num_pipe_configs;
903 u32 num_rbs = rdev->config.cik.max_backends_per_se *
904 rdev->config.cik.max_shader_engines;
906 switch (rdev->config.cik.mem_row_size_in_kb) {
908 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
912 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
915 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
919 num_pipe_configs = rdev->config.cik.max_tile_pipes;
920 if (num_pipe_configs > 8)
921 num_pipe_configs = 8; /* ??? */
923 if (num_pipe_configs == 8) {
924 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
925 switch (reg_offset) {
927 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
928 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
930 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
933 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
934 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
935 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
936 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
939 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
940 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
942 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
945 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
946 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
947 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
948 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
951 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
952 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
953 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
954 TILE_SPLIT(split_equal_to_row_size));
957 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
958 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
961 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
963 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
964 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
967 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
968 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
970 TILE_SPLIT(split_equal_to_row_size));
973 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
977 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
978 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
981 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
982 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
984 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
987 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
988 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
989 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
993 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
994 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
995 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
999 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1000 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1004 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1009 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1010 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1011 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1015 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1016 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1021 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1022 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1025 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1026 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1027 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1032 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1033 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1037 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1038 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1039 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1047 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1049 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1050 switch (reg_offset) {
1052 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1053 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1054 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1055 NUM_BANKS(ADDR_SURF_16_BANK));
1058 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1059 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1060 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1061 NUM_BANKS(ADDR_SURF_16_BANK));
1064 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1065 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1066 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1067 NUM_BANKS(ADDR_SURF_16_BANK));
1070 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1071 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1072 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1073 NUM_BANKS(ADDR_SURF_16_BANK));
1076 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1077 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1078 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1079 NUM_BANKS(ADDR_SURF_8_BANK));
1082 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1085 NUM_BANKS(ADDR_SURF_4_BANK));
1088 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1091 NUM_BANKS(ADDR_SURF_2_BANK));
1094 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1095 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1096 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1097 NUM_BANKS(ADDR_SURF_16_BANK));
1100 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1101 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1102 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1103 NUM_BANKS(ADDR_SURF_16_BANK));
1106 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1107 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1108 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1109 NUM_BANKS(ADDR_SURF_16_BANK));
1112 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1115 NUM_BANKS(ADDR_SURF_16_BANK));
1118 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1121 NUM_BANKS(ADDR_SURF_8_BANK));
1124 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1125 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1126 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1127 NUM_BANKS(ADDR_SURF_4_BANK));
1130 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1133 NUM_BANKS(ADDR_SURF_2_BANK));
1139 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1141 } else if (num_pipe_configs == 4) {
1143 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1144 switch (reg_offset) {
1146 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1148 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1149 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1152 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1153 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1154 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1155 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1158 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1159 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1160 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1161 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1164 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1165 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1166 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1170 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1171 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1172 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1173 TILE_SPLIT(split_equal_to_row_size));
1176 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1177 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1181 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1182 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1186 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1188 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189 TILE_SPLIT(split_equal_to_row_size));
1192 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1193 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1196 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1197 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1200 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1201 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1202 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1206 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1207 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1208 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1212 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1213 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1214 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1218 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1219 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1222 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1223 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1224 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1228 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1229 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1230 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1234 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1235 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1236 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1240 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1241 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1244 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1245 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1246 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1250 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1251 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1252 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1256 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1257 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1258 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1265 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1266 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1268 } else if (num_rbs < 4) {
1269 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1270 switch (reg_offset) {
1272 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1273 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1274 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1278 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1280 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1284 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1286 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1287 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1290 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1291 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1292 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1296 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1298 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1299 TILE_SPLIT(split_equal_to_row_size));
1302 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1303 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1306 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1307 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1308 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1312 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1314 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1315 TILE_SPLIT(split_equal_to_row_size));
1318 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1319 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1322 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1323 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1326 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1327 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1328 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1332 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1333 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1334 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1338 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1339 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1340 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1344 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1349 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1350 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1354 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1355 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1356 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1360 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1361 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1362 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1366 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1367 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1371 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1372 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1377 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1378 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1382 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1384 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1391 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1392 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1395 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1396 switch (reg_offset) {
1398 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1401 NUM_BANKS(ADDR_SURF_16_BANK));
1404 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1407 NUM_BANKS(ADDR_SURF_16_BANK));
1410 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1413 NUM_BANKS(ADDR_SURF_16_BANK));
1416 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1419 NUM_BANKS(ADDR_SURF_16_BANK));
1422 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1425 NUM_BANKS(ADDR_SURF_16_BANK));
1428 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1431 NUM_BANKS(ADDR_SURF_8_BANK));
1434 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1437 NUM_BANKS(ADDR_SURF_4_BANK));
1440 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1443 NUM_BANKS(ADDR_SURF_16_BANK));
1446 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1449 NUM_BANKS(ADDR_SURF_16_BANK));
1452 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1455 NUM_BANKS(ADDR_SURF_16_BANK));
1458 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1461 NUM_BANKS(ADDR_SURF_16_BANK));
1464 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1467 NUM_BANKS(ADDR_SURF_16_BANK));
1470 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1473 NUM_BANKS(ADDR_SURF_8_BANK));
1476 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1479 NUM_BANKS(ADDR_SURF_4_BANK));
1485 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1487 } else if (num_pipe_configs == 2) {
1488 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1489 switch (reg_offset) {
1491 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1492 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1493 PIPE_CONFIG(ADDR_SURF_P2) |
1494 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1497 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1498 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1499 PIPE_CONFIG(ADDR_SURF_P2) |
1500 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1503 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1504 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1505 PIPE_CONFIG(ADDR_SURF_P2) |
1506 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1509 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1510 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1511 PIPE_CONFIG(ADDR_SURF_P2) |
1512 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1515 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1516 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1517 PIPE_CONFIG(ADDR_SURF_P2) |
1518 TILE_SPLIT(split_equal_to_row_size));
1521 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1522 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1525 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1527 PIPE_CONFIG(ADDR_SURF_P2) |
1528 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1531 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1532 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1533 PIPE_CONFIG(ADDR_SURF_P2) |
1534 TILE_SPLIT(split_equal_to_row_size));
1537 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1540 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1541 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1544 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1545 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1546 PIPE_CONFIG(ADDR_SURF_P2) |
1547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1550 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1551 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1552 PIPE_CONFIG(ADDR_SURF_P2) |
1553 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1556 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1557 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1558 PIPE_CONFIG(ADDR_SURF_P2) |
1559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1562 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1563 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1566 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1567 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1568 PIPE_CONFIG(ADDR_SURF_P2) |
1569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1572 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1573 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1574 PIPE_CONFIG(ADDR_SURF_P2) |
1575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1578 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1579 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1580 PIPE_CONFIG(ADDR_SURF_P2) |
1581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1584 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1585 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1588 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1589 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1590 PIPE_CONFIG(ADDR_SURF_P2) |
1591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1594 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1595 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1596 PIPE_CONFIG(ADDR_SURF_P2) |
1597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1600 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1601 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1602 PIPE_CONFIG(ADDR_SURF_P2) |
1603 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1609 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1610 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1612 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1613 switch (reg_offset) {
1615 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1616 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1617 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1618 NUM_BANKS(ADDR_SURF_16_BANK));
1621 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1622 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1623 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1624 NUM_BANKS(ADDR_SURF_16_BANK));
1627 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1628 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1629 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1630 NUM_BANKS(ADDR_SURF_16_BANK));
1633 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1636 NUM_BANKS(ADDR_SURF_16_BANK));
1639 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1642 NUM_BANKS(ADDR_SURF_16_BANK));
1645 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1648 NUM_BANKS(ADDR_SURF_16_BANK));
1651 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1652 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1653 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1654 NUM_BANKS(ADDR_SURF_8_BANK));
1657 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1658 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1659 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1660 NUM_BANKS(ADDR_SURF_16_BANK));
1663 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1664 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1665 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1666 NUM_BANKS(ADDR_SURF_16_BANK));
1669 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1670 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1671 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1672 NUM_BANKS(ADDR_SURF_16_BANK));
1675 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1676 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1677 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1678 NUM_BANKS(ADDR_SURF_16_BANK));
1681 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1682 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1683 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1684 NUM_BANKS(ADDR_SURF_16_BANK));
1687 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1688 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1689 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1690 NUM_BANKS(ADDR_SURF_16_BANK));
1693 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1694 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1695 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1696 NUM_BANKS(ADDR_SURF_8_BANK));
1702 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1705 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1709 * cik_select_se_sh - select which SE, SH to address
1711 * @rdev: radeon_device pointer
1712 * @se_num: shader engine to address
1713 * @sh_num: sh block to address
1715 * Select which SE, SH combinations to address. Certain
1716 * registers are instanced per SE or SH. 0xffffffff means
1717 * broadcast to all SEs or SHs (CIK).
1719 static void cik_select_se_sh(struct radeon_device *rdev,
1720 u32 se_num, u32 sh_num)
1722 u32 data = INSTANCE_BROADCAST_WRITES;
1724 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1725 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1726 else if (se_num == 0xffffffff)
1727 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1728 else if (sh_num == 0xffffffff)
1729 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1731 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1732 WREG32(GRBM_GFX_INDEX, data);
1736 * cik_create_bitmask - create a bitmask
1738 * @bit_width: length of the mask
1740 * create a variable length bit mask (CIK).
1741 * Returns the bitmask.
1743 static u32 cik_create_bitmask(u32 bit_width)
1747 for (i = 0; i < bit_width; i++) {
1755 * cik_select_se_sh - select which SE, SH to address
1757 * @rdev: radeon_device pointer
1758 * @max_rb_num: max RBs (render backends) for the asic
1759 * @se_num: number of SEs (shader engines) for the asic
1760 * @sh_per_se: number of SH blocks per SE for the asic
1762 * Calculates the bitmask of disabled RBs (CIK).
1763 * Returns the disabled RB bitmask.
1765 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1766 u32 max_rb_num, u32 se_num,
1771 data = RREG32(CC_RB_BACKEND_DISABLE);
1773 data &= BACKEND_DISABLE_MASK;
1776 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1778 data >>= BACKEND_DISABLE_SHIFT;
1780 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1786 * cik_setup_rb - setup the RBs on the asic
1788 * @rdev: radeon_device pointer
1789 * @se_num: number of SEs (shader engines) for the asic
1790 * @sh_per_se: number of SH blocks per SE for the asic
1791 * @max_rb_num: max RBs (render backends) for the asic
1793 * Configures per-SE/SH RB registers (CIK).
1795 static void cik_setup_rb(struct radeon_device *rdev,
1796 u32 se_num, u32 sh_per_se,
1801 u32 disabled_rbs = 0;
1802 u32 enabled_rbs = 0;
1804 for (i = 0; i < se_num; i++) {
1805 for (j = 0; j < sh_per_se; j++) {
1806 cik_select_se_sh(rdev, i, j);
1807 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1808 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1811 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1814 for (i = 0; i < max_rb_num; i++) {
1815 if (!(disabled_rbs & mask))
1816 enabled_rbs |= mask;
1820 for (i = 0; i < se_num; i++) {
1821 cik_select_se_sh(rdev, i, 0xffffffff);
1823 for (j = 0; j < sh_per_se; j++) {
1824 switch (enabled_rbs & 3) {
1826 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1829 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1833 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1838 WREG32(PA_SC_RASTER_CONFIG, data);
1840 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1844 * cik_gpu_init - setup the 3D engine
1846 * @rdev: radeon_device pointer
1848 * Configures the 3D engine and tiling configuration
1849 * registers so that the 3D engine is usable.
1851 static __unused void cik_gpu_init(struct radeon_device *rdev)
1853 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1854 u32 mc_shared_chmap, mc_arb_ramcfg;
1855 u32 hdp_host_path_cntl;
1859 switch (rdev->family) {
1861 rdev->config.cik.max_shader_engines = 2;
1862 rdev->config.cik.max_tile_pipes = 4;
1863 rdev->config.cik.max_cu_per_sh = 7;
1864 rdev->config.cik.max_sh_per_se = 1;
1865 rdev->config.cik.max_backends_per_se = 2;
1866 rdev->config.cik.max_texture_channel_caches = 4;
1867 rdev->config.cik.max_gprs = 256;
1868 rdev->config.cik.max_gs_threads = 32;
1869 rdev->config.cik.max_hw_contexts = 8;
1871 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1872 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1873 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1874 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1875 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1882 rdev->config.cik.max_shader_engines = 1;
1883 rdev->config.cik.max_tile_pipes = 2;
1884 rdev->config.cik.max_cu_per_sh = 2;
1885 rdev->config.cik.max_sh_per_se = 1;
1886 rdev->config.cik.max_backends_per_se = 1;
1887 rdev->config.cik.max_texture_channel_caches = 2;
1888 rdev->config.cik.max_gprs = 256;
1889 rdev->config.cik.max_gs_threads = 16;
1890 rdev->config.cik.max_hw_contexts = 8;
1892 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1893 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1894 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1895 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1896 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1900 /* Initialize HDP */
1901 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1902 WREG32((0x2c14 + j), 0x00000000);
1903 WREG32((0x2c18 + j), 0x00000000);
1904 WREG32((0x2c1c + j), 0x00000000);
1905 WREG32((0x2c20 + j), 0x00000000);
1906 WREG32((0x2c24 + j), 0x00000000);
1909 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1911 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1913 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1914 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1916 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1917 rdev->config.cik.mem_max_burst_length_bytes = 256;
1918 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1919 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1920 if (rdev->config.cik.mem_row_size_in_kb > 4)
1921 rdev->config.cik.mem_row_size_in_kb = 4;
1922 /* XXX use MC settings? */
1923 rdev->config.cik.shader_engine_tile_size = 32;
1924 rdev->config.cik.num_gpus = 1;
1925 rdev->config.cik.multi_gpu_tile_size = 64;
1927 /* fix up row size */
1928 gb_addr_config &= ~ROW_SIZE_MASK;
1929 switch (rdev->config.cik.mem_row_size_in_kb) {
1932 gb_addr_config |= ROW_SIZE(0);
1935 gb_addr_config |= ROW_SIZE(1);
1938 gb_addr_config |= ROW_SIZE(2);
1942 /* setup tiling info dword. gb_addr_config is not adequate since it does
1943 * not have bank info, so create a custom tiling dword.
1944 * bits 3:0 num_pipes
1945 * bits 7:4 num_banks
1946 * bits 11:8 group_size
1947 * bits 15:12 row_size
1949 rdev->config.cik.tile_config = 0;
1950 switch (rdev->config.cik.num_tile_pipes) {
1952 rdev->config.cik.tile_config |= (0 << 0);
1955 rdev->config.cik.tile_config |= (1 << 0);
1958 rdev->config.cik.tile_config |= (2 << 0);
1962 /* XXX what about 12? */
1963 rdev->config.cik.tile_config |= (3 << 0);
1966 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1967 rdev->config.cik.tile_config |= 1 << 4;
1969 rdev->config.cik.tile_config |= 0 << 4;
1970 rdev->config.cik.tile_config |=
1971 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1972 rdev->config.cik.tile_config |=
1973 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1975 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1976 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1977 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1978 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1979 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1980 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1981 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1982 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1984 cik_tiling_mode_table_init(rdev);
1986 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1987 rdev->config.cik.max_sh_per_se,
1988 rdev->config.cik.max_backends_per_se);
1990 /* set HW defaults for 3D engine */
1991 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1993 WREG32(SX_DEBUG_1, 0x20);
1995 WREG32(TA_CNTL_AUX, 0x00010000);
1997 tmp = RREG32(SPI_CONFIG_CNTL);
1999 WREG32(SPI_CONFIG_CNTL, tmp);
2001 WREG32(SQ_CONFIG, 1);
2003 WREG32(DB_DEBUG, 0);
2005 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2007 WREG32(DB_DEBUG2, tmp);
2009 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2011 WREG32(DB_DEBUG3, tmp);
2013 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2015 WREG32(CB_HW_CONTROL, tmp);
2017 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2019 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2020 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2021 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2022 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2024 WREG32(VGT_NUM_INSTANCES, 1);
2026 WREG32(CP_PERFMON_CNTL, 0);
2028 WREG32(SQ_CONFIG, 0);
2030 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2031 FORCE_EOV_MAX_REZ_CNT(255)));
2033 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2034 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2036 WREG32(VGT_GS_VERTEX_REUSE, 16);
2037 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2039 tmp = RREG32(HDP_MISC_CNTL);
2040 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2041 WREG32(HDP_MISC_CNTL, tmp);
2043 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2044 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2046 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2047 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2053 * GPU scratch registers helpers function.
2056 * cik_scratch_init - setup driver info for CP scratch regs
2058 * @rdev: radeon_device pointer
2060 * Set up the number and offset of the CP scratch registers.
2061 * NOTE: use of CP scratch registers is a legacy inferface and
2062 * is not used by default on newer asics (r6xx+). On newer asics,
2063 * memory buffers are used for fences rather than scratch regs.
2065 static __unused void cik_scratch_init(struct radeon_device *rdev)
2069 rdev->scratch.num_reg = 7;
2070 rdev->scratch.reg_base = SCRATCH_REG0;
2071 for (i = 0; i < rdev->scratch.num_reg; i++) {
2072 rdev->scratch.free[i] = true;
2073 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2078 * cik_ring_test - basic gfx ring test
2080 * @rdev: radeon_device pointer
2081 * @ring: radeon_ring structure holding ring information
2083 * Allocate a scratch register and write to it using the gfx ring (CIK).
2084 * Provides a basic gfx ring test to verify that the ring is working.
2085 * Used by cik_cp_gfx_resume();
2086 * Returns 0 on success, error on failure.
2088 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2095 r = radeon_scratch_get(rdev, &scratch);
2097 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2100 WREG32(scratch, 0xCAFEDEAD);
2101 r = radeon_ring_lock(rdev, ring, 3);
2103 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2104 radeon_scratch_free(rdev, scratch);
2107 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2108 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2109 radeon_ring_write(ring, 0xDEADBEEF);
2110 radeon_ring_unlock_commit(rdev, ring);
2112 for (i = 0; i < rdev->usec_timeout; i++) {
2113 tmp = RREG32(scratch);
2114 if (tmp == 0xDEADBEEF)
2118 if (i < rdev->usec_timeout) {
2119 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2121 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2122 ring->idx, scratch, tmp);
2125 radeon_scratch_free(rdev, scratch);
2130 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2132 * @rdev: radeon_device pointer
2133 * @fence: radeon fence object
2135 * Emits a fence sequnce number on the gfx ring and flushes
2138 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2139 struct radeon_fence *fence)
2141 struct radeon_ring *ring = &rdev->ring[fence->ring];
2142 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2144 /* EVENT_WRITE_EOP - flush caches, send int */
2145 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2146 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2148 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2150 radeon_ring_write(ring, addr & 0xfffffffc);
2151 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2152 radeon_ring_write(ring, fence->seq);
2153 radeon_ring_write(ring, 0);
2155 /* We should be using the new WAIT_REG_MEM special op packet here
2156 * but it causes the CP to hang
2158 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2159 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2160 WRITE_DATA_DST_SEL(0)));
2161 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2162 radeon_ring_write(ring, 0);
2163 radeon_ring_write(ring, 0);
2167 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2169 * @rdev: radeon_device pointer
2170 * @fence: radeon fence object
2172 * Emits a fence sequnce number on the compute ring and flushes
2175 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2176 struct radeon_fence *fence)
2178 struct radeon_ring *ring = &rdev->ring[fence->ring];
2179 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2181 /* RELEASE_MEM - flush caches, send int */
2182 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2183 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2185 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2187 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2188 radeon_ring_write(ring, addr & 0xfffffffc);
2189 radeon_ring_write(ring, upper_32_bits(addr));
2190 radeon_ring_write(ring, fence->seq);
2191 radeon_ring_write(ring, 0);
2193 /* We should be using the new WAIT_REG_MEM special op packet here
2194 * but it causes the CP to hang
2196 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2197 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2198 WRITE_DATA_DST_SEL(0)));
2199 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2200 radeon_ring_write(ring, 0);
2201 radeon_ring_write(ring, 0);
2204 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2205 struct radeon_ring *ring,
2206 struct radeon_semaphore *semaphore,
2209 uint64_t addr = semaphore->gpu_addr;
2210 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2212 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2213 radeon_ring_write(ring, addr & 0xffffffff);
2214 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2221 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2223 * @rdev: radeon_device pointer
2224 * @ib: radeon indirect buffer object
2226 * Emits an DE (drawing engine) or CE (constant engine) IB
2227 * on the gfx ring. IBs are usually generated by userspace
2228 * acceleration drivers and submitted to the kernel for
2229 * sheduling on the ring. This function schedules the IB
2230 * on the gfx ring for execution by the GPU.
2232 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2234 struct radeon_ring *ring = &rdev->ring[ib->ring];
2235 u32 header, control = INDIRECT_BUFFER_VALID;
2237 if (ib->is_const_ib) {
2238 /* set switch buffer packet before const IB */
2239 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2240 radeon_ring_write(ring, 0);
2242 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2245 if (ring->rptr_save_reg) {
2246 next_rptr = ring->wptr + 3 + 4;
2247 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2248 radeon_ring_write(ring, ((ring->rptr_save_reg -
2249 PACKET3_SET_UCONFIG_REG_START) >> 2));
2250 radeon_ring_write(ring, next_rptr);
2251 } else if (rdev->wb.enabled) {
2252 next_rptr = ring->wptr + 5 + 4;
2253 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2254 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2255 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2256 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2257 radeon_ring_write(ring, next_rptr);
2260 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2263 control |= ib->length_dw |
2264 (ib->vm ? (ib->vm->id << 24) : 0);
2266 radeon_ring_write(ring, header);
2267 radeon_ring_write(ring,
2271 (ib->gpu_addr & 0xFFFFFFFC));
2272 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2273 radeon_ring_write(ring, control);
2277 * cik_ib_test - basic gfx ring IB test
2279 * @rdev: radeon_device pointer
2280 * @ring: radeon_ring structure holding ring information
2282 * Allocate an IB and execute it on the gfx ring (CIK).
2283 * Provides a basic gfx ring test to verify that IBs are working.
2284 * Returns 0 on success, error on failure.
2286 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2288 struct radeon_ib ib;
2294 r = radeon_scratch_get(rdev, &scratch);
2296 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2299 WREG32(scratch, 0xCAFEDEAD);
2300 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2302 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2305 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2306 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2307 ib.ptr[2] = 0xDEADBEEF;
2309 r = radeon_ib_schedule(rdev, &ib, NULL);
2311 radeon_scratch_free(rdev, scratch);
2312 radeon_ib_free(rdev, &ib);
2313 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2316 r = radeon_fence_wait(ib.fence, false);
2318 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2321 for (i = 0; i < rdev->usec_timeout; i++) {
2322 tmp = RREG32(scratch);
2323 if (tmp == 0xDEADBEEF)
2327 if (i < rdev->usec_timeout) {
2328 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2330 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2334 radeon_scratch_free(rdev, scratch);
2335 radeon_ib_free(rdev, &ib);
2341 * On CIK, gfx and compute now have independant command processors.
2344 * Gfx consists of a single ring and can process both gfx jobs and
2345 * compute jobs. The gfx CP consists of three microengines (ME):
2346 * PFP - Pre-Fetch Parser
2348 * CE - Constant Engine
2349 * The PFP and ME make up what is considered the Drawing Engine (DE).
2350 * The CE is an asynchronous engine used for updating buffer desciptors
2351 * used by the DE so that they can be loaded into cache in parallel
2352 * while the DE is processing state update packets.
2355 * The compute CP consists of two microengines (ME):
2356 * MEC1 - Compute MicroEngine 1
2357 * MEC2 - Compute MicroEngine 2
2358 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2359 * The queues are exposed to userspace and are programmed directly
2360 * by the compute runtime.
2363 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2365 * @rdev: radeon_device pointer
2366 * @enable: enable or disable the MEs
2368 * Halts or unhalts the gfx MEs.
2370 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2373 WREG32(CP_ME_CNTL, 0);
2375 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2376 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2382 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2384 * @rdev: radeon_device pointer
2386 * Loads the gfx PFP, ME, and CE ucode.
2387 * Returns 0 for success, -EINVAL if the ucode is not available.
2389 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2391 const __be32 *fw_data;
2394 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2397 cik_cp_gfx_enable(rdev, false);
2400 fw_data = (const __be32 *)rdev->pfp_fw->data;
2401 WREG32(CP_PFP_UCODE_ADDR, 0);
2402 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2403 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2404 WREG32(CP_PFP_UCODE_ADDR, 0);
2407 fw_data = (const __be32 *)rdev->ce_fw->data;
2408 WREG32(CP_CE_UCODE_ADDR, 0);
2409 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2410 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2411 WREG32(CP_CE_UCODE_ADDR, 0);
2414 fw_data = (const __be32 *)rdev->me_fw->data;
2415 WREG32(CP_ME_RAM_WADDR, 0);
2416 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2417 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2418 WREG32(CP_ME_RAM_WADDR, 0);
2420 WREG32(CP_PFP_UCODE_ADDR, 0);
2421 WREG32(CP_CE_UCODE_ADDR, 0);
2422 WREG32(CP_ME_RAM_WADDR, 0);
2423 WREG32(CP_ME_RAM_RADDR, 0);
2428 * cik_cp_gfx_start - start the gfx ring
2430 * @rdev: radeon_device pointer
2432 * Enables the ring and loads the clear state context and other
2433 * packets required to init the ring.
2434 * Returns 0 for success, error for failure.
2436 static int cik_cp_gfx_start(struct radeon_device *rdev)
2438 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2442 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2443 WREG32(CP_ENDIAN_SWAP, 0);
2444 WREG32(CP_DEVICE_ID, 1);
2446 cik_cp_gfx_enable(rdev, true);
2448 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2450 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2454 /* init the CE partitions. CE only used for gfx on CIK */
2455 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2456 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2457 radeon_ring_write(ring, 0xc000);
2458 radeon_ring_write(ring, 0xc000);
2460 /* setup clear context state */
2461 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2462 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2464 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2465 radeon_ring_write(ring, 0x80000000);
2466 radeon_ring_write(ring, 0x80000000);
2468 for (i = 0; i < cik_default_size; i++)
2469 radeon_ring_write(ring, cik_default_state[i]);
2471 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2472 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2474 /* set clear context state */
2475 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2476 radeon_ring_write(ring, 0);
2478 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2479 radeon_ring_write(ring, 0x00000316);
2480 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2481 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2483 radeon_ring_unlock_commit(rdev, ring);
2489 * cik_cp_gfx_fini - stop the gfx ring
2491 * @rdev: radeon_device pointer
2493 * Stop the gfx ring and tear down the driver ring
2496 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2498 cik_cp_gfx_enable(rdev, false);
2499 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2503 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2505 * @rdev: radeon_device pointer
2507 * Program the location and size of the gfx ring buffer
2508 * and test it to make sure it's working.
2509 * Returns 0 for success, error for failure.
2511 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2513 struct radeon_ring *ring;
2519 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2520 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2522 /* Set the write pointer delay */
2523 WREG32(CP_RB_WPTR_DELAY, 0);
2525 /* set the RB to use vmid 0 */
2526 WREG32(CP_RB_VMID, 0);
2528 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2530 /* ring 0 - compute and gfx */
2531 /* Set ring buffer size */
2532 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2533 rb_bufsz = drm_order(ring->ring_size / 8);
2534 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2536 tmp |= BUF_SWAP_32BIT;
2538 WREG32(CP_RB0_CNTL, tmp);
2540 /* Initialize the ring buffer's read and write pointers */
2541 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2543 WREG32(CP_RB0_WPTR, ring->wptr);
2545 /* set the wb address wether it's enabled or not */
2546 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2547 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2549 /* scratch register shadowing is no longer supported */
2550 WREG32(SCRATCH_UMSK, 0);
2552 if (!rdev->wb.enabled)
2553 tmp |= RB_NO_UPDATE;
2556 WREG32(CP_RB0_CNTL, tmp);
2558 rb_addr = ring->gpu_addr >> 8;
2559 WREG32(CP_RB0_BASE, rb_addr);
2560 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2562 ring->rptr = RREG32(CP_RB0_RPTR);
2564 /* start the ring */
2565 cik_cp_gfx_start(rdev);
2566 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2567 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2569 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2575 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2576 struct radeon_ring *ring)
2582 if (rdev->wb.enabled) {
2583 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2585 spin_lock(&rdev->srbm_mutex);
2586 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2587 rptr = RREG32(CP_HQD_PQ_RPTR);
2588 cik_srbm_select(rdev, 0, 0, 0, 0);
2589 spin_unlock(&rdev->srbm_mutex);
2591 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2596 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2597 struct radeon_ring *ring)
2601 if (rdev->wb.enabled) {
2602 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2604 spin_lock(&rdev->srbm_mutex);
2605 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2606 wptr = RREG32(CP_HQD_PQ_WPTR);
2607 cik_srbm_select(rdev, 0, 0, 0, 0);
2608 spin_unlock(&rdev->srbm_mutex);
2610 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2615 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2616 struct radeon_ring *ring)
2618 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2620 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2621 WDOORBELL32(ring->doorbell_offset, wptr);
2625 * cik_cp_compute_enable - enable/disable the compute CP MEs
2627 * @rdev: radeon_device pointer
2628 * @enable: enable or disable the MEs
2630 * Halts or unhalts the compute MEs.
2632 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2635 WREG32(CP_MEC_CNTL, 0);
2637 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2642 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2644 * @rdev: radeon_device pointer
2646 * Loads the compute MEC1&2 ucode.
2647 * Returns 0 for success, -EINVAL if the ucode is not available.
2649 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2651 const __be32 *fw_data;
2657 cik_cp_compute_enable(rdev, false);
2660 fw_data = (const __be32 *)rdev->mec_fw->data;
2661 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2662 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2663 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2664 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2666 if (rdev->family == CHIP_KAVERI) {
2668 fw_data = (const __be32 *)rdev->mec_fw->data;
2669 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2670 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2671 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2672 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2679 * cik_cp_compute_start - start the compute queues
2681 * @rdev: radeon_device pointer
2683 * Enable the compute queues.
2684 * Returns 0 for success, error for failure.
2686 static int cik_cp_compute_start(struct radeon_device *rdev)
2688 cik_cp_compute_enable(rdev, true);
2694 * cik_cp_compute_fini - stop the compute queues
2696 * @rdev: radeon_device pointer
2698 * Stop the compute queues and tear down the driver queue
2701 static void cik_cp_compute_fini(struct radeon_device *rdev)
2705 cik_cp_compute_enable(rdev, false);
2707 for (i = 0; i < 2; i++) {
2709 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2711 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2713 if (rdev->ring[idx].mqd_obj) {
2714 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2715 if (unlikely(r != 0))
2716 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2718 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2719 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2721 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2722 rdev->ring[idx].mqd_obj = NULL;
2727 static void cik_mec_fini(struct radeon_device *rdev)
2731 if (rdev->mec.hpd_eop_obj) {
2732 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2733 if (unlikely(r != 0))
2734 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2735 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2736 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2738 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2739 rdev->mec.hpd_eop_obj = NULL;
2743 #define MEC_HPD_SIZE 2048
2745 static int cik_mec_init(struct radeon_device *rdev)
2751 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2752 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2754 if (rdev->family == CHIP_KAVERI)
2755 rdev->mec.num_mec = 2;
2757 rdev->mec.num_mec = 1;
2758 rdev->mec.num_pipe = 4;
2759 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2761 if (rdev->mec.hpd_eop_obj == NULL) {
2762 r = radeon_bo_create(rdev,
2763 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2765 RADEON_GEM_DOMAIN_GTT, NULL,
2766 &rdev->mec.hpd_eop_obj);
2768 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2773 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2774 if (unlikely(r != 0)) {
2778 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2779 &rdev->mec.hpd_eop_gpu_addr);
2781 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2785 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2787 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2792 /* clear memory. Not sure if this is required or not */
2793 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2795 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2796 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2801 struct hqd_registers
2803 u32 cp_mqd_base_addr;
2804 u32 cp_mqd_base_addr_hi;
2807 u32 cp_hqd_persistent_state;
2808 u32 cp_hqd_pipe_priority;
2809 u32 cp_hqd_queue_priority;
2812 u32 cp_hqd_pq_base_hi;
2814 u32 cp_hqd_pq_rptr_report_addr;
2815 u32 cp_hqd_pq_rptr_report_addr_hi;
2816 u32 cp_hqd_pq_wptr_poll_addr;
2817 u32 cp_hqd_pq_wptr_poll_addr_hi;
2818 u32 cp_hqd_pq_doorbell_control;
2820 u32 cp_hqd_pq_control;
2821 u32 cp_hqd_ib_base_addr;
2822 u32 cp_hqd_ib_base_addr_hi;
2824 u32 cp_hqd_ib_control;
2825 u32 cp_hqd_iq_timer;
2827 u32 cp_hqd_dequeue_request;
2828 u32 cp_hqd_dma_offload;
2829 u32 cp_hqd_sema_cmd;
2830 u32 cp_hqd_msg_type;
2831 u32 cp_hqd_atomic0_preop_lo;
2832 u32 cp_hqd_atomic0_preop_hi;
2833 u32 cp_hqd_atomic1_preop_lo;
2834 u32 cp_hqd_atomic1_preop_hi;
2835 u32 cp_hqd_hq_scheduler0;
2836 u32 cp_hqd_hq_scheduler1;
2843 u32 dispatch_initiator;
2847 u32 pipeline_stat_enable;
2848 u32 perf_counter_enable;
2854 u32 resource_limits;
2855 u32 static_thread_mgmt01[2];
2857 u32 static_thread_mgmt23[2];
2859 u32 thread_trace_enable;
2862 u32 vgtcs_invoke_count[2];
2863 struct hqd_registers queue_state;
2865 u32 interrupt_queue[64];
2869 * cik_cp_compute_resume - setup the compute queue registers
2871 * @rdev: radeon_device pointer
2873 * Program the compute queues and test them to make sure they
2875 * Returns 0 for success, error for failure.
2877 static int cik_cp_compute_resume(struct radeon_device *rdev)
2881 bool use_doorbell = true;
2887 struct bonaire_mqd *mqd;
2889 r = cik_cp_compute_start(rdev);
2893 /* fix up chicken bits */
2894 tmp = RREG32(CP_CPF_DEBUG);
2896 WREG32(CP_CPF_DEBUG, tmp);
2898 /* init the pipes */
2899 spin_lock(&rdev->srbm_mutex);
2900 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2901 int me = (i < 4) ? 1 : 2;
2902 int pipe = (i < 4) ? i : (i - 4);
2904 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2906 cik_srbm_select(rdev, me, pipe, 0, 0);
2908 /* write the EOP addr */
2909 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2910 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2912 /* set the VMID assigned */
2913 WREG32(CP_HPD_EOP_VMID, 0);
2915 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2916 tmp = RREG32(CP_HPD_EOP_CONTROL);
2917 tmp &= ~EOP_SIZE_MASK;
2918 tmp |= drm_order(MEC_HPD_SIZE / 8);
2919 WREG32(CP_HPD_EOP_CONTROL, tmp);
2921 cik_srbm_select(rdev, 0, 0, 0, 0);
2922 spin_unlock(&rdev->srbm_mutex);
2924 /* init the queues. Just two for now. */
2925 for (i = 0; i < 2; i++) {
2927 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2929 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2931 if (rdev->ring[idx].mqd_obj == NULL) {
2932 r = radeon_bo_create(rdev,
2933 sizeof(struct bonaire_mqd),
2935 RADEON_GEM_DOMAIN_GTT, NULL,
2936 &rdev->ring[idx].mqd_obj);
2938 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2943 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2944 if (unlikely(r != 0)) {
2945 cik_cp_compute_fini(rdev);
2948 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2951 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2952 cik_cp_compute_fini(rdev);
2955 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2957 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2958 cik_cp_compute_fini(rdev);
2962 /* doorbell offset */
2963 rdev->ring[idx].doorbell_offset =
2964 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2966 /* init the mqd struct */
2967 memset(buf, 0, sizeof(struct bonaire_mqd));
2969 mqd = (struct bonaire_mqd *)buf;
2970 mqd->header = 0xC0310800;
2971 mqd->static_thread_mgmt01[0] = 0xffffffff;
2972 mqd->static_thread_mgmt01[1] = 0xffffffff;
2973 mqd->static_thread_mgmt23[0] = 0xffffffff;
2974 mqd->static_thread_mgmt23[1] = 0xffffffff;
2976 spin_lock(&rdev->srbm_mutex);
2977 cik_srbm_select(rdev, rdev->ring[idx].me,
2978 rdev->ring[idx].pipe,
2979 rdev->ring[idx].queue, 0);
2981 /* disable wptr polling */
2982 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2983 tmp &= ~WPTR_POLL_EN;
2984 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2986 /* enable doorbell? */
2987 mqd->queue_state.cp_hqd_pq_doorbell_control =
2988 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2990 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2992 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2993 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2994 mqd->queue_state.cp_hqd_pq_doorbell_control);
2996 /* disable the queue if it's active */
2997 mqd->queue_state.cp_hqd_dequeue_request = 0;
2998 mqd->queue_state.cp_hqd_pq_rptr = 0;
2999 mqd->queue_state.cp_hqd_pq_wptr= 0;
3000 if (RREG32(CP_HQD_ACTIVE) & 1) {
3001 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3002 for (i = 0; i < rdev->usec_timeout; i++) {
3003 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3007 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3008 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3009 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3012 /* set the pointer to the MQD */
3013 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3014 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3015 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3016 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3017 /* set MQD vmid to 0 */
3018 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3019 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3020 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3022 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3023 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3024 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3025 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3026 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3027 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3029 /* set up the HQD, this is similar to CP_RB0_CNTL */
3030 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3031 mqd->queue_state.cp_hqd_pq_control &=
3032 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3034 mqd->queue_state.cp_hqd_pq_control |=
3035 drm_order(rdev->ring[idx].ring_size / 8);
3036 mqd->queue_state.cp_hqd_pq_control |=
3037 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3039 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3041 mqd->queue_state.cp_hqd_pq_control &=
3042 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3043 mqd->queue_state.cp_hqd_pq_control |=
3044 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3045 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3047 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3049 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3051 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3052 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3053 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3054 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3055 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3056 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3058 /* set the wb address wether it's enabled or not */
3060 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3062 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3063 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3064 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3065 upper_32_bits(wb_gpu_addr) & 0xffff;
3066 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3067 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3068 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3069 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3071 /* enable the doorbell if requested */
3073 mqd->queue_state.cp_hqd_pq_doorbell_control =
3074 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3075 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3076 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3077 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3078 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3079 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3080 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3083 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3085 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3086 mqd->queue_state.cp_hqd_pq_doorbell_control);
3088 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3089 rdev->ring[idx].wptr = 0;
3090 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3091 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3092 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3093 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3095 /* set the vmid for the queue */
3096 mqd->queue_state.cp_hqd_vmid = 0;
3097 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3099 /* activate the queue */
3100 mqd->queue_state.cp_hqd_active = 1;
3101 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3103 cik_srbm_select(rdev, 0, 0, 0, 0);
3104 spin_unlock(&rdev->srbm_mutex);
3106 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3107 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3109 rdev->ring[idx].ready = true;
3110 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3112 rdev->ring[idx].ready = false;
3118 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3120 cik_cp_gfx_enable(rdev, enable);
3121 cik_cp_compute_enable(rdev, enable);
3124 static int cik_cp_load_microcode(struct radeon_device *rdev)
3128 r = cik_cp_gfx_load_microcode(rdev);
3131 r = cik_cp_compute_load_microcode(rdev);
3138 static void cik_cp_fini(struct radeon_device *rdev)
3140 cik_cp_gfx_fini(rdev);
3141 cik_cp_compute_fini(rdev);
3144 static int cik_cp_resume(struct radeon_device *rdev)
3148 /* Reset all cp blocks */
3149 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3150 RREG32(GRBM_SOFT_RESET);
3152 WREG32(GRBM_SOFT_RESET, 0);
3153 RREG32(GRBM_SOFT_RESET);
3155 r = cik_cp_load_microcode(rdev);
3159 r = cik_cp_gfx_resume(rdev);
3162 r = cik_cp_compute_resume(rdev);
3171 * Starting with CIK, the GPU has new asynchronous
3172 * DMA engines. These engines are used for compute
3173 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3174 * and each one supports 1 ring buffer used for gfx
3175 * and 2 queues used for compute.
3177 * The programming model is very similar to the CP
3178 * (ring buffer, IBs, etc.), but sDMA has it's own
3179 * packet format that is different from the PM4 format
3180 * used by the CP. sDMA supports copying data, writing
3181 * embedded data, solid fills, and a number of other
3182 * things. It also has support for tiling/detiling of
3186 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3188 * @rdev: radeon_device pointer
3189 * @ib: IB object to schedule
3191 * Schedule an IB in the DMA ring (CIK).
3193 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3194 struct radeon_ib *ib)
3196 struct radeon_ring *ring = &rdev->ring[ib->ring];
3197 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3199 if (rdev->wb.enabled) {
3200 u32 next_rptr = ring->wptr + 5;
3201 while ((next_rptr & 7) != 4)
3204 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3205 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3206 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3207 radeon_ring_write(ring, 1); /* number of DWs to follow */
3208 radeon_ring_write(ring, next_rptr);
3211 /* IB packet must end on a 8 DW boundary */
3212 while ((ring->wptr & 7) != 4)
3213 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3214 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3215 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3216 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3217 radeon_ring_write(ring, ib->length_dw);
3222 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3224 * @rdev: radeon_device pointer
3225 * @fence: radeon fence object
3227 * Add a DMA fence packet to the ring to write
3228 * the fence seq number and DMA trap packet to generate
3229 * an interrupt if needed (CIK).
3231 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3232 struct radeon_fence *fence)
3234 struct radeon_ring *ring = &rdev->ring[fence->ring];
3235 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3236 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3237 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3240 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3241 ref_and_mask = SDMA0;
3243 ref_and_mask = SDMA1;
3245 /* write the fence */
3246 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3247 radeon_ring_write(ring, addr & 0xffffffff);
3248 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3249 radeon_ring_write(ring, fence->seq);
3250 /* generate an interrupt */
3251 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3253 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3254 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3255 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3256 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3257 radeon_ring_write(ring, ref_and_mask); /* MASK */
3258 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3262 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3264 * @rdev: radeon_device pointer
3265 * @ring: radeon_ring structure holding ring information
3266 * @semaphore: radeon semaphore object
3267 * @emit_wait: wait or signal semaphore
3269 * Add a DMA semaphore packet to the ring wait on or signal
3270 * other rings (CIK).
3272 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3273 struct radeon_ring *ring,
3274 struct radeon_semaphore *semaphore,
3277 u64 addr = semaphore->gpu_addr;
3278 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3280 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3281 radeon_ring_write(ring, addr & 0xfffffff8);
3282 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3286 * cik_sdma_gfx_stop - stop the gfx async dma engines
3288 * @rdev: radeon_device pointer
3290 * Stop the gfx async dma ring buffers (CIK).
3292 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3294 u32 rb_cntl, reg_offset;
3297 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3299 for (i = 0; i < 2; i++) {
3301 reg_offset = SDMA0_REGISTER_OFFSET;
3303 reg_offset = SDMA1_REGISTER_OFFSET;
3304 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3305 rb_cntl &= ~SDMA_RB_ENABLE;
3306 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3307 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3312 * cik_sdma_rlc_stop - stop the compute async dma engines
3314 * @rdev: radeon_device pointer
3316 * Stop the compute async dma queues (CIK).
3318 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3324 * cik_sdma_enable - stop the async dma engines
3326 * @rdev: radeon_device pointer
3327 * @enable: enable/disable the DMA MEs.
3329 * Halt or unhalt the async dma engines (CIK).
3331 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3333 u32 me_cntl, reg_offset;
3336 for (i = 0; i < 2; i++) {
3338 reg_offset = SDMA0_REGISTER_OFFSET;
3340 reg_offset = SDMA1_REGISTER_OFFSET;
3341 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3343 me_cntl &= ~SDMA_HALT;
3345 me_cntl |= SDMA_HALT;
3346 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3351 * cik_sdma_gfx_resume - setup and start the async dma engines
3353 * @rdev: radeon_device pointer
3355 * Set up the gfx DMA ring buffers and enable them (CIK).
3356 * Returns 0 for success, error for failure.
3358 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3360 struct radeon_ring *ring;
3361 u32 rb_cntl, ib_cntl;
3363 u32 reg_offset, wb_offset;
3366 for (i = 0; i < 2; i++) {
3368 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3369 reg_offset = SDMA0_REGISTER_OFFSET;
3370 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3372 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3373 reg_offset = SDMA1_REGISTER_OFFSET;
3374 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3377 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3378 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3380 /* Set ring buffer size in dwords */
3381 rb_bufsz = drm_order(ring->ring_size / 4);
3382 rb_cntl = rb_bufsz << 1;
3384 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3386 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3388 /* Initialize the ring buffer's read and write pointers */
3389 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3390 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3392 /* set the wb address whether it's enabled or not */
3393 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3394 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3395 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3396 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3398 if (rdev->wb.enabled)
3399 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3401 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3402 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3405 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3407 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3410 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3412 ib_cntl = SDMA_IB_ENABLE;
3414 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3416 /* enable DMA IBs */
3417 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3421 r = radeon_ring_test(rdev, ring->idx, ring);
3423 ring->ready = false;
3428 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3434 * cik_sdma_rlc_resume - setup and start the async dma engines
3436 * @rdev: radeon_device pointer
3438 * Set up the compute DMA queues and enable them (CIK).
3439 * Returns 0 for success, error for failure.
3441 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3448 * cik_sdma_load_microcode - load the sDMA ME ucode
3450 * @rdev: radeon_device pointer
3452 * Loads the sDMA0/1 ucode.
3453 * Returns 0 for success, -EINVAL if the ucode is not available.
3455 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3457 const __be32 *fw_data;
3463 /* stop the gfx rings and rlc compute queues */
3464 cik_sdma_gfx_stop(rdev);
3465 cik_sdma_rlc_stop(rdev);
3468 cik_sdma_enable(rdev, false);
3471 fw_data = (const __be32 *)rdev->sdma_fw->data;
3472 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3473 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3474 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3475 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3478 fw_data = (const __be32 *)rdev->sdma_fw->data;
3479 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3480 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3481 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3482 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3484 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3485 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3490 * cik_sdma_resume - setup and start the async dma engines
3492 * @rdev: radeon_device pointer
3494 * Set up the DMA engines and enable them (CIK).
3495 * Returns 0 for success, error for failure.
3497 static __unused int cik_sdma_resume(struct radeon_device *rdev)
3502 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3503 RREG32(SRBM_SOFT_RESET);
3505 WREG32(SRBM_SOFT_RESET, 0);
3506 RREG32(SRBM_SOFT_RESET);
3508 r = cik_sdma_load_microcode(rdev);
3512 /* unhalt the MEs */
3513 cik_sdma_enable(rdev, true);
3515 /* start the gfx rings and rlc compute queues */
3516 r = cik_sdma_gfx_resume(rdev);
3519 r = cik_sdma_rlc_resume(rdev);
3527 * cik_sdma_fini - tear down the async dma engines
3529 * @rdev: radeon_device pointer
3531 * Stop the async dma engines and free the rings (CIK).
3533 static __unused void cik_sdma_fini(struct radeon_device *rdev)
3535 /* stop the gfx rings and rlc compute queues */
3536 cik_sdma_gfx_stop(rdev);
3537 cik_sdma_rlc_stop(rdev);
3539 cik_sdma_enable(rdev, false);
3540 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3541 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3542 /* XXX - compute dma queue tear down */
3546 * cik_copy_dma - copy pages using the DMA engine
3548 * @rdev: radeon_device pointer
3549 * @src_offset: src GPU address
3550 * @dst_offset: dst GPU address
3551 * @num_gpu_pages: number of GPU pages to xfer
3552 * @fence: radeon fence object
3554 * Copy GPU paging using the DMA engine (CIK).
3555 * Used by the radeon ttm implementation to move pages if
3556 * registered as the asic copy callback.
3558 int cik_copy_dma(struct radeon_device *rdev,
3559 uint64_t src_offset, uint64_t dst_offset,
3560 unsigned num_gpu_pages,
3561 struct radeon_fence **fence)
3563 struct radeon_semaphore *sem = NULL;
3564 int ring_index = rdev->asic->copy.dma_ring_index;
3565 struct radeon_ring *ring = &rdev->ring[ring_index];
3566 u32 size_in_bytes, cur_size_in_bytes;
3570 r = radeon_semaphore_create(rdev, &sem);
3572 DRM_ERROR("radeon: moving bo (%d).\n", r);
3576 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3577 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3578 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3580 DRM_ERROR("radeon: moving bo (%d).\n", r);
3581 radeon_semaphore_free(rdev, &sem, NULL);
3585 if (radeon_fence_need_sync(*fence, ring->idx)) {
3586 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3588 radeon_fence_note_sync(*fence, ring->idx);
3590 radeon_semaphore_free(rdev, &sem, NULL);
3593 for (i = 0; i < num_loops; i++) {
3594 cur_size_in_bytes = size_in_bytes;
3595 if (cur_size_in_bytes > 0x1fffff)
3596 cur_size_in_bytes = 0x1fffff;
3597 size_in_bytes -= cur_size_in_bytes;
3598 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3599 radeon_ring_write(ring, cur_size_in_bytes);
3600 radeon_ring_write(ring, 0); /* src/dst endian swap */
3601 radeon_ring_write(ring, src_offset & 0xffffffff);
3602 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3603 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3604 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3605 src_offset += cur_size_in_bytes;
3606 dst_offset += cur_size_in_bytes;
3609 r = radeon_fence_emit(rdev, fence, ring->idx);
3611 radeon_ring_unlock_undo(rdev, ring);
3615 radeon_ring_unlock_commit(rdev, ring);
3616 radeon_semaphore_free(rdev, &sem, *fence);
3622 * cik_sdma_ring_test - simple async dma engine test
3624 * @rdev: radeon_device pointer
3625 * @ring: radeon_ring structure holding ring information
3627 * Test the DMA engine by writing using it to write an
3628 * value to memory. (CIK).
3629 * Returns 0 for success, error for failure.
3631 int cik_sdma_ring_test(struct radeon_device *rdev,
3632 struct radeon_ring *ring)
3636 volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr;
3640 DRM_ERROR("invalid vram scratch pointer\n");
3647 r = radeon_ring_lock(rdev, ring, 4);
3649 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3652 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3653 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3654 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3655 radeon_ring_write(ring, 1); /* number of DWs to follow */
3656 radeon_ring_write(ring, 0xDEADBEEF);
3657 radeon_ring_unlock_commit(rdev, ring);
3659 for (i = 0; i < rdev->usec_timeout; i++) {
3661 if (tmp == 0xDEADBEEF)
3666 if (i < rdev->usec_timeout) {
3667 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3669 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3677 * cik_sdma_ib_test - test an IB on the DMA engine
3679 * @rdev: radeon_device pointer
3680 * @ring: radeon_ring structure holding ring information
3682 * Test a simple IB in the DMA ring (CIK).
3683 * Returns 0 on success, error on failure.
3685 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3687 struct radeon_ib ib;
3690 volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr;
3694 DRM_ERROR("invalid vram scratch pointer\n");
3701 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3703 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3707 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3708 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3709 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3711 ib.ptr[4] = 0xDEADBEEF;
3714 r = radeon_ib_schedule(rdev, &ib, NULL);
3716 radeon_ib_free(rdev, &ib);
3717 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3720 r = radeon_fence_wait(ib.fence, false);
3722 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3725 for (i = 0; i < rdev->usec_timeout; i++) {
3727 if (tmp == 0xDEADBEEF)
3731 if (i < rdev->usec_timeout) {
3732 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3734 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3737 radeon_ib_free(rdev, &ib);
3742 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3744 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3745 RREG32(GRBM_STATUS));
3746 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3747 RREG32(GRBM_STATUS2));
3748 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3749 RREG32(GRBM_STATUS_SE0));
3750 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3751 RREG32(GRBM_STATUS_SE1));
3752 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3753 RREG32(GRBM_STATUS_SE2));
3754 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3755 RREG32(GRBM_STATUS_SE3));
3756 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3757 RREG32(SRBM_STATUS));
3758 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3759 RREG32(SRBM_STATUS2));
3760 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3761 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3762 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3763 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3764 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3765 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3766 RREG32(CP_STALLED_STAT1));
3767 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3768 RREG32(CP_STALLED_STAT2));
3769 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3770 RREG32(CP_STALLED_STAT3));
3771 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3772 RREG32(CP_CPF_BUSY_STAT));
3773 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3774 RREG32(CP_CPF_STALLED_STAT1));
3775 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3776 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3777 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3778 RREG32(CP_CPC_STALLED_STAT1));
3779 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3783 * cik_gpu_check_soft_reset - check which blocks are busy
3785 * @rdev: radeon_device pointer
3787 * Check which blocks are busy and return the relevant reset
3788 * mask to be used by cik_gpu_soft_reset().
3789 * Returns a mask of the blocks to be reset.
3791 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3797 tmp = RREG32(GRBM_STATUS);
3798 if (tmp & (PA_BUSY | SC_BUSY |
3799 BCI_BUSY | SX_BUSY |
3800 TA_BUSY | VGT_BUSY |
3802 GDS_BUSY | SPI_BUSY |
3803 IA_BUSY | IA_BUSY_NO_DMA))
3804 reset_mask |= RADEON_RESET_GFX;
3806 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3807 reset_mask |= RADEON_RESET_CP;
3810 tmp = RREG32(GRBM_STATUS2);
3812 reset_mask |= RADEON_RESET_RLC;
3814 /* SDMA0_STATUS_REG */
3815 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3816 if (!(tmp & SDMA_IDLE))
3817 reset_mask |= RADEON_RESET_DMA;
3819 /* SDMA1_STATUS_REG */
3820 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3821 if (!(tmp & SDMA_IDLE))
3822 reset_mask |= RADEON_RESET_DMA1;
3825 tmp = RREG32(SRBM_STATUS2);
3826 if (tmp & SDMA_BUSY)
3827 reset_mask |= RADEON_RESET_DMA;
3829 if (tmp & SDMA1_BUSY)
3830 reset_mask |= RADEON_RESET_DMA1;
3833 tmp = RREG32(SRBM_STATUS);
3836 reset_mask |= RADEON_RESET_IH;
3839 reset_mask |= RADEON_RESET_SEM;
3841 if (tmp & GRBM_RQ_PENDING)
3842 reset_mask |= RADEON_RESET_GRBM;
3845 reset_mask |= RADEON_RESET_VMC;
3847 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3848 MCC_BUSY | MCD_BUSY))
3849 reset_mask |= RADEON_RESET_MC;
3851 if (evergreen_is_display_hung(rdev))
3852 reset_mask |= RADEON_RESET_DISPLAY;
3854 /* Skip MC reset as it's mostly likely not hung, just busy */
3855 if (reset_mask & RADEON_RESET_MC) {
3856 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3857 reset_mask &= ~RADEON_RESET_MC;
3864 * cik_gpu_soft_reset - soft reset GPU
3866 * @rdev: radeon_device pointer
3867 * @reset_mask: mask of which blocks to reset
3869 * Soft reset the blocks specified in @reset_mask.
3871 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3873 struct evergreen_mc_save save;
3874 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3877 if (reset_mask == 0)
3880 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3882 cik_print_gpu_status_regs(rdev);
3883 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3884 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3885 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3886 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3891 /* Disable GFX parsing/prefetching */
3892 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3894 /* Disable MEC parsing/prefetching */
3895 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3897 if (reset_mask & RADEON_RESET_DMA) {
3899 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3901 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3903 if (reset_mask & RADEON_RESET_DMA1) {
3905 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3907 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3910 evergreen_mc_stop(rdev, &save);
3911 if (evergreen_mc_wait_for_idle(rdev)) {
3912 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3915 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3916 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3918 if (reset_mask & RADEON_RESET_CP) {
3919 grbm_soft_reset |= SOFT_RESET_CP;
3921 srbm_soft_reset |= SOFT_RESET_GRBM;
3924 if (reset_mask & RADEON_RESET_DMA)
3925 srbm_soft_reset |= SOFT_RESET_SDMA;
3927 if (reset_mask & RADEON_RESET_DMA1)
3928 srbm_soft_reset |= SOFT_RESET_SDMA1;
3930 if (reset_mask & RADEON_RESET_DISPLAY)
3931 srbm_soft_reset |= SOFT_RESET_DC;
3933 if (reset_mask & RADEON_RESET_RLC)
3934 grbm_soft_reset |= SOFT_RESET_RLC;
3936 if (reset_mask & RADEON_RESET_SEM)
3937 srbm_soft_reset |= SOFT_RESET_SEM;
3939 if (reset_mask & RADEON_RESET_IH)
3940 srbm_soft_reset |= SOFT_RESET_IH;
3942 if (reset_mask & RADEON_RESET_GRBM)
3943 srbm_soft_reset |= SOFT_RESET_GRBM;
3945 if (reset_mask & RADEON_RESET_VMC)
3946 srbm_soft_reset |= SOFT_RESET_VMC;
3948 if (!(rdev->flags & RADEON_IS_IGP)) {
3949 if (reset_mask & RADEON_RESET_MC)
3950 srbm_soft_reset |= SOFT_RESET_MC;
3953 if (grbm_soft_reset) {
3954 tmp = RREG32(GRBM_SOFT_RESET);
3955 tmp |= grbm_soft_reset;
3956 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3957 WREG32(GRBM_SOFT_RESET, tmp);
3958 tmp = RREG32(GRBM_SOFT_RESET);
3962 tmp &= ~grbm_soft_reset;
3963 WREG32(GRBM_SOFT_RESET, tmp);
3964 tmp = RREG32(GRBM_SOFT_RESET);
3967 if (srbm_soft_reset) {
3968 tmp = RREG32(SRBM_SOFT_RESET);
3969 tmp |= srbm_soft_reset;
3970 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3971 WREG32(SRBM_SOFT_RESET, tmp);
3972 tmp = RREG32(SRBM_SOFT_RESET);
3976 tmp &= ~srbm_soft_reset;
3977 WREG32(SRBM_SOFT_RESET, tmp);
3978 tmp = RREG32(SRBM_SOFT_RESET);
3981 /* Wait a little for things to settle down */
3984 evergreen_mc_resume(rdev, &save);
3987 cik_print_gpu_status_regs(rdev);
3991 * cik_asic_reset - soft reset GPU
3993 * @rdev: radeon_device pointer
3995 * Look up which blocks are hung and attempt
3997 * Returns 0 for success.
3999 int cik_asic_reset(struct radeon_device *rdev)
4003 reset_mask = cik_gpu_check_soft_reset(rdev);
4006 r600_set_bios_scratch_engine_hung(rdev, true);
4008 cik_gpu_soft_reset(rdev, reset_mask);
4010 reset_mask = cik_gpu_check_soft_reset(rdev);
4013 r600_set_bios_scratch_engine_hung(rdev, false);
4019 * cik_gfx_is_lockup - check if the 3D engine is locked up
4021 * @rdev: radeon_device pointer
4022 * @ring: radeon_ring structure holding ring information
4024 * Check if the 3D engine is locked up (CIK).
4025 * Returns true if the engine is locked, false if not.
4027 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4029 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4031 if (!(reset_mask & (RADEON_RESET_GFX |
4032 RADEON_RESET_COMPUTE |
4033 RADEON_RESET_CP))) {
4034 radeon_ring_lockup_update(ring);
4037 /* force CP activities */
4038 radeon_ring_force_activity(rdev, ring);
4039 return radeon_ring_test_lockup(rdev, ring);
4043 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4045 * @rdev: radeon_device pointer
4046 * @ring: radeon_ring structure holding ring information
4048 * Check if the async DMA engine is locked up (CIK).
4049 * Returns true if the engine appears to be locked up, false if not.
4051 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4053 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4056 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4057 mask = RADEON_RESET_DMA;
4059 mask = RADEON_RESET_DMA1;
4061 if (!(reset_mask & mask)) {
4062 radeon_ring_lockup_update(ring);
4065 /* force ring activities */
4066 radeon_ring_force_activity(rdev, ring);
4067 return radeon_ring_test_lockup(rdev, ring);
4072 * cik_mc_program - program the GPU memory controller
4074 * @rdev: radeon_device pointer
4076 * Set the location of vram, gart, and AGP in the GPU's
4077 * physical address space (CIK).
4079 static __unused void cik_mc_program(struct radeon_device *rdev)
4081 struct evergreen_mc_save save;
4085 /* Initialize HDP */
4086 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4087 WREG32((0x2c14 + j), 0x00000000);
4088 WREG32((0x2c18 + j), 0x00000000);
4089 WREG32((0x2c1c + j), 0x00000000);
4090 WREG32((0x2c20 + j), 0x00000000);
4091 WREG32((0x2c24 + j), 0x00000000);
4093 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4095 evergreen_mc_stop(rdev, &save);
4096 if (radeon_mc_wait_for_idle(rdev)) {
4097 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4099 /* Lockout access through VGA aperture*/
4100 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4101 /* Update configuration */
4102 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4103 rdev->mc.vram_start >> 12);
4104 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4105 rdev->mc.vram_end >> 12);
4106 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4107 rdev->vram_scratch.gpu_addr >> 12);
4108 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4109 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4110 WREG32(MC_VM_FB_LOCATION, tmp);
4111 /* XXX double check these! */
4112 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4113 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4114 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4115 WREG32(MC_VM_AGP_BASE, 0);
4116 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4117 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4118 if (radeon_mc_wait_for_idle(rdev)) {
4119 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4121 evergreen_mc_resume(rdev, &save);
4122 /* we need to own VRAM, so turn off the VGA renderer here
4123 * to stop it overwriting our objects */
4124 rv515_vga_render_disable(rdev);
4128 * cik_mc_init - initialize the memory controller driver params
4130 * @rdev: radeon_device pointer
4132 * Look up the amount of vram, vram width, and decide how to place
4133 * vram and gart within the GPU's physical address space (CIK).
4134 * Returns 0 for success.
4136 static __unused int cik_mc_init(struct radeon_device *rdev)
4139 int chansize, numchan;
4141 /* Get VRAM informations */
4142 rdev->mc.vram_is_ddr = true;
4143 tmp = RREG32(MC_ARB_RAMCFG);
4144 if (tmp & CHANSIZE_MASK) {
4149 tmp = RREG32(MC_SHARED_CHMAP);
4150 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4180 rdev->mc.vram_width = numchan * chansize;
4181 /* Could aper size report 0 ? */
4182 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
4183 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
4184 /* size in MB on si */
4185 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4186 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4187 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4188 si_vram_gtt_location(rdev, &rdev->mc);
4189 radeon_update_bandwidth_info(rdev);
4196 * VMID 0 is the physical GPU addresses as used by the kernel.
4197 * VMIDs 1-15 are used for userspace clients and are handled
4198 * by the radeon vm/hsa code.
4201 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4203 * @rdev: radeon_device pointer
4205 * Flush the TLB for the VMID 0 page table (CIK).
4207 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4209 /* flush hdp cache */
4210 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4212 /* bits 0-15 are the VM contexts0-15 */
4213 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4217 * cik_pcie_gart_enable - gart enable
4219 * @rdev: radeon_device pointer
4221 * This sets up the TLBs, programs the page tables for VMID0,
4222 * sets up the hw for VMIDs 1-15 which are allocated on
4223 * demand, and sets up the global locations for the LDS, GDS,
4224 * and GPUVM for FSA64 clients (CIK).
4225 * Returns 0 for success, errors for failure.
4227 static __unused int cik_pcie_gart_enable(struct radeon_device *rdev)
4231 if (rdev->gart.robj == NULL) {
4232 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4235 r = radeon_gart_table_vram_pin(rdev);
4238 radeon_gart_restore(rdev);
4239 /* Setup TLB control */
4240 WREG32(MC_VM_MX_L1_TLB_CNTL,
4243 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4244 ENABLE_ADVANCED_DRIVER_MODEL |
4245 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4246 /* Setup L2 cache */
4247 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4248 ENABLE_L2_FRAGMENT_PROCESSING |
4249 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4250 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4251 EFFECTIVE_L2_QUEUE_SIZE(7) |
4252 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4253 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4254 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4255 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4256 /* setup context0 */
4257 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4258 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4259 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4260 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4261 (u32)(rdev->dummy_page.addr >> 12));
4262 WREG32(VM_CONTEXT0_CNTL2, 0);
4263 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4264 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4270 /* empty context1-15 */
4271 /* FIXME start with 4G, once using 2 level pt switch to full
4274 /* set vm size, must be a multiple of 4 */
4275 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4276 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4277 for (i = 1; i < 16; i++) {
4279 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4280 rdev->gart.table_addr >> 12);
4282 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4283 rdev->gart.table_addr >> 12);
4286 /* enable context1-15 */
4287 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4288 (u32)(rdev->dummy_page.addr >> 12));
4289 WREG32(VM_CONTEXT1_CNTL2, 4);
4290 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4291 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4292 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4293 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4294 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4295 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4296 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4297 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4298 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4299 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4300 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4301 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4302 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4304 /* TC cache setup ??? */
4305 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4306 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4307 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4309 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4310 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4311 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4312 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4313 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4315 WREG32(TC_CFG_L1_VOLATILE, 0);
4316 WREG32(TC_CFG_L2_VOLATILE, 0);
4318 if (rdev->family == CHIP_KAVERI) {
4319 u32 tmp = RREG32(CHUB_CONTROL);
4321 WREG32(CHUB_CONTROL, tmp);
4324 /* XXX SH_MEM regs */
4325 /* where to put LDS, scratch, GPUVM in FSA64 space */
4326 spin_lock(&rdev->srbm_mutex);
4327 for (i = 0; i < 16; i++) {
4328 cik_srbm_select(rdev, 0, 0, 0, i);
4329 /* CP and shaders */
4330 WREG32(SH_MEM_CONFIG, 0);
4331 WREG32(SH_MEM_APE1_BASE, 1);
4332 WREG32(SH_MEM_APE1_LIMIT, 0);
4333 WREG32(SH_MEM_BASES, 0);
4335 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4336 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4337 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4338 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4339 /* XXX SDMA RLC - todo */
4341 cik_srbm_select(rdev, 0, 0, 0, 0);
4342 spin_unlock(&rdev->srbm_mutex);
4344 cik_pcie_gart_tlb_flush(rdev);
4345 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4346 (unsigned)(rdev->mc.gtt_size >> 20),
4347 (unsigned long long)rdev->gart.table_addr);
4348 rdev->gart.ready = true;
4353 * cik_pcie_gart_disable - gart disable
4355 * @rdev: radeon_device pointer
4357 * This disables all VM page table (CIK).
4359 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4361 /* Disable all tables */
4362 WREG32(VM_CONTEXT0_CNTL, 0);
4363 WREG32(VM_CONTEXT1_CNTL, 0);
4364 /* Setup TLB control */
4365 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4366 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4367 /* Setup L2 cache */
4369 ENABLE_L2_FRAGMENT_PROCESSING |
4370 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4371 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4372 EFFECTIVE_L2_QUEUE_SIZE(7) |
4373 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4374 WREG32(VM_L2_CNTL2, 0);
4375 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4376 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4377 radeon_gart_table_vram_unpin(rdev);
4381 * cik_pcie_gart_fini - vm fini callback
4383 * @rdev: radeon_device pointer
4385 * Tears down the driver GART/VM setup (CIK).
4387 static __unused void cik_pcie_gart_fini(struct radeon_device *rdev)
4389 cik_pcie_gart_disable(rdev);
4390 radeon_gart_table_vram_free(rdev);
4391 radeon_gart_fini(rdev);
4396 * cik_ib_parse - vm ib_parse callback
4398 * @rdev: radeon_device pointer
4399 * @ib: indirect buffer pointer
4401 * CIK uses hw IB checking so this is a nop (CIK).
4403 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4410 * VMID 0 is the physical GPU addresses as used by the kernel.
4411 * VMIDs 1-15 are used for userspace clients and are handled
4412 * by the radeon vm/hsa code.
4415 * cik_vm_init - cik vm init callback
4417 * @rdev: radeon_device pointer
4419 * Inits cik specific vm parameters (number of VMs, base of vram for
4420 * VMIDs 1-15) (CIK).
4421 * Returns 0 for success.
4423 int cik_vm_init(struct radeon_device *rdev)
4426 rdev->vm_manager.nvm = 16;
4427 /* base offset of vram pages */
4428 if (rdev->flags & RADEON_IS_IGP) {
4429 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4431 rdev->vm_manager.vram_base_offset = tmp;
4433 rdev->vm_manager.vram_base_offset = 0;
4439 * cik_vm_fini - cik vm fini callback
4441 * @rdev: radeon_device pointer
4443 * Tear down any asic specific VM setup (CIK).
4445 void cik_vm_fini(struct radeon_device *rdev)
4450 * cik_vm_decode_fault - print human readable fault info
4452 * @rdev: radeon_device pointer
4453 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4454 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4456 * Print human readable fault information (CIK).
4458 static void cik_vm_decode_fault(struct radeon_device *rdev,
4459 u32 status, u32 addr, u32 mc_client)
4461 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4462 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4463 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4464 char *block = (char *)&mc_client;
4466 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4467 protections, vmid, addr,
4468 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4473 * cik_vm_flush - cik vm flush using the CP
4475 * @rdev: radeon_device pointer
4477 * Update the page table base and flush the VM TLB
4478 * using the CP (CIK).
4480 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4482 struct radeon_ring *ring = &rdev->ring[ridx];
4487 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4488 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4489 WRITE_DATA_DST_SEL(0)));
4491 radeon_ring_write(ring,
4492 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4494 radeon_ring_write(ring,
4495 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4497 radeon_ring_write(ring, 0);
4498 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4500 /* update SH_MEM_* regs */
4501 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4502 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4503 WRITE_DATA_DST_SEL(0)));
4504 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4505 radeon_ring_write(ring, 0);
4506 radeon_ring_write(ring, VMID(vm->id));
4508 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4509 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4510 WRITE_DATA_DST_SEL(0)));
4511 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4512 radeon_ring_write(ring, 0);
4514 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4515 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4516 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4517 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4519 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4520 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4521 WRITE_DATA_DST_SEL(0)));
4522 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4523 radeon_ring_write(ring, 0);
4524 radeon_ring_write(ring, VMID(0));
4527 /* We should be using the WAIT_REG_MEM packet here like in
4528 * cik_fence_ring_emit(), but it causes the CP to hang in this
4531 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4532 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4533 WRITE_DATA_DST_SEL(0)));
4534 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4535 radeon_ring_write(ring, 0);
4536 radeon_ring_write(ring, 0);
4538 /* bits 0-15 are the VM contexts0-15 */
4539 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4540 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4541 WRITE_DATA_DST_SEL(0)));
4542 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4543 radeon_ring_write(ring, 0);
4544 radeon_ring_write(ring, 1 << vm->id);
4546 /* compute doesn't have PFP */
4547 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4548 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4549 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4550 radeon_ring_write(ring, 0x0);
4555 * cik_vm_set_page - update the page tables using sDMA
4557 * @rdev: radeon_device pointer
4558 * @ib: indirect buffer to fill with commands
4559 * @pe: addr of the page entry
4560 * @addr: dst addr to write into pe
4561 * @count: number of page entries to update
4562 * @incr: increase next addr by incr bytes
4563 * @flags: access flags
4565 * Update the page tables using CP or sDMA (CIK).
4567 void cik_vm_set_page(struct radeon_device *rdev,
4568 struct radeon_ib *ib,
4570 uint64_t addr, unsigned count,
4571 uint32_t incr, uint32_t flags)
4573 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4577 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4580 ndw = 2 + count * 2;
4584 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4585 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4586 WRITE_DATA_DST_SEL(1));
4587 ib->ptr[ib->length_dw++] = pe;
4588 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4589 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4590 if (flags & RADEON_VM_PAGE_SYSTEM) {
4591 value = radeon_vm_map_gart(rdev, addr);
4592 value &= 0xFFFFFFFFFFFFF000ULL;
4593 } else if (flags & RADEON_VM_PAGE_VALID) {
4599 value |= r600_flags;
4600 ib->ptr[ib->length_dw++] = value;
4601 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4606 if (flags & RADEON_VM_PAGE_SYSTEM) {
4612 /* for non-physically contiguous pages (system) */
4613 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4614 ib->ptr[ib->length_dw++] = pe;
4615 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4616 ib->ptr[ib->length_dw++] = ndw;
4617 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4618 if (flags & RADEON_VM_PAGE_SYSTEM) {
4619 value = radeon_vm_map_gart(rdev, addr);
4620 value &= 0xFFFFFFFFFFFFF000ULL;
4621 } else if (flags & RADEON_VM_PAGE_VALID) {
4627 value |= r600_flags;
4628 ib->ptr[ib->length_dw++] = value;
4629 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4638 if (flags & RADEON_VM_PAGE_VALID)
4642 /* for physically contiguous pages (vram) */
4643 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4644 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4645 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4646 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4647 ib->ptr[ib->length_dw++] = 0;
4648 ib->ptr[ib->length_dw++] = value; /* value */
4649 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4650 ib->ptr[ib->length_dw++] = incr; /* increment size */
4651 ib->ptr[ib->length_dw++] = 0;
4652 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4658 while (ib->length_dw & 0x7)
4659 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4664 * cik_dma_vm_flush - cik vm flush using sDMA
4666 * @rdev: radeon_device pointer
4668 * Update the page table base and flush the VM TLB
4671 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4673 struct radeon_ring *ring = &rdev->ring[ridx];
4674 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4675 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4681 if (ridx == R600_RING_TYPE_DMA_INDEX)
4682 ref_and_mask = SDMA0;
4684 ref_and_mask = SDMA1;
4686 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4688 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4690 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4692 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4694 /* update SH_MEM_* regs */
4695 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4696 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4697 radeon_ring_write(ring, VMID(vm->id));
4699 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4700 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4701 radeon_ring_write(ring, 0);
4703 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4704 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4705 radeon_ring_write(ring, 0);
4707 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4708 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4709 radeon_ring_write(ring, 1);
4711 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4712 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4713 radeon_ring_write(ring, 0);
4715 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4716 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4717 radeon_ring_write(ring, VMID(0));
4720 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4721 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4722 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4723 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4724 radeon_ring_write(ring, ref_and_mask); /* MASK */
4725 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4728 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4729 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4730 radeon_ring_write(ring, 1 << vm->id);
4735 * The RLC is a multi-purpose microengine that handles a
4736 * variety of functions, the most important of which is
4737 * the interrupt controller.
4740 * cik_rlc_stop - stop the RLC ME
4742 * @rdev: radeon_device pointer
4744 * Halt the RLC ME (MicroEngine) (CIK).
4746 static void cik_rlc_stop(struct radeon_device *rdev)
4751 tmp = RREG32(CP_INT_CNTL_RING0);
4752 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4753 WREG32(CP_INT_CNTL_RING0, tmp);
4755 RREG32(CB_CGTT_SCLK_CTRL);
4756 RREG32(CB_CGTT_SCLK_CTRL);
4757 RREG32(CB_CGTT_SCLK_CTRL);
4758 RREG32(CB_CGTT_SCLK_CTRL);
4760 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4761 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4763 WREG32(RLC_CNTL, 0);
4765 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4766 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4767 cik_select_se_sh(rdev, i, j);
4768 for (k = 0; k < rdev->usec_timeout; k++) {
4769 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4775 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4777 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4778 for (k = 0; k < rdev->usec_timeout; k++) {
4779 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4786 * cik_rlc_start - start the RLC ME
4788 * @rdev: radeon_device pointer
4790 * Unhalt the RLC ME (MicroEngine) (CIK).
4792 static void cik_rlc_start(struct radeon_device *rdev)
4796 WREG32(RLC_CNTL, RLC_ENABLE);
4798 tmp = RREG32(CP_INT_CNTL_RING0);
4799 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4800 WREG32(CP_INT_CNTL_RING0, tmp);
4806 * cik_rlc_resume - setup the RLC hw
4808 * @rdev: radeon_device pointer
4810 * Initialize the RLC registers, load the ucode,
4811 * and start the RLC (CIK).
4812 * Returns 0 for success, -EINVAL if the ucode is not available.
4814 static __unused int cik_rlc_resume(struct radeon_device *rdev)
4817 u32 clear_state_info[3];
4818 const __be32 *fw_data;
4823 switch (rdev->family) {
4826 size = BONAIRE_RLC_UCODE_SIZE;
4829 size = KV_RLC_UCODE_SIZE;
4832 size = KB_RLC_UCODE_SIZE;
4838 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4839 RREG32(GRBM_SOFT_RESET);
4841 WREG32(GRBM_SOFT_RESET, 0);
4842 RREG32(GRBM_SOFT_RESET);
4845 WREG32(RLC_LB_CNTR_INIT, 0);
4846 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4848 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4849 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4850 WREG32(RLC_LB_PARAMS, 0x00600408);
4851 WREG32(RLC_LB_CNTL, 0x80000004);
4853 WREG32(RLC_MC_CNTL, 0);
4854 WREG32(RLC_UCODE_CNTL, 0);
4856 fw_data = (const __be32 *)rdev->rlc_fw->data;
4857 WREG32(RLC_GPM_UCODE_ADDR, 0);
4858 for (i = 0; i < size; i++)
4859 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4860 WREG32(RLC_GPM_UCODE_ADDR, 0);
4863 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4864 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4865 clear_state_info[2] = 0;//cik_default_size;
4866 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4867 for (i = 0; i < 3; i++)
4868 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4869 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4871 cik_rlc_start(rdev);
4878 * Starting with r6xx, interrupts are handled via a ring buffer.
4879 * Ring buffers are areas of GPU accessible memory that the GPU
4880 * writes interrupt vectors into and the host reads vectors out of.
4881 * There is a rptr (read pointer) that determines where the
4882 * host is currently reading, and a wptr (write pointer)
4883 * which determines where the GPU has written. When the
4884 * pointers are equal, the ring is idle. When the GPU
4885 * writes vectors to the ring buffer, it increments the
4886 * wptr. When there is an interrupt, the host then starts
4887 * fetching commands and processing them until the pointers are
4888 * equal again at which point it updates the rptr.
4892 * cik_enable_interrupts - Enable the interrupt ring buffer
4894 * @rdev: radeon_device pointer
4896 * Enable the interrupt ring buffer (CIK).
4898 static void cik_enable_interrupts(struct radeon_device *rdev)
4900 u32 ih_cntl = RREG32(IH_CNTL);
4901 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4903 ih_cntl |= ENABLE_INTR;
4904 ih_rb_cntl |= IH_RB_ENABLE;
4905 WREG32(IH_CNTL, ih_cntl);
4906 WREG32(IH_RB_CNTL, ih_rb_cntl);
4907 rdev->ih.enabled = true;
4911 * cik_disable_interrupts - Disable the interrupt ring buffer
4913 * @rdev: radeon_device pointer
4915 * Disable the interrupt ring buffer (CIK).
4917 static void cik_disable_interrupts(struct radeon_device *rdev)
4919 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4920 u32 ih_cntl = RREG32(IH_CNTL);
4922 ih_rb_cntl &= ~IH_RB_ENABLE;
4923 ih_cntl &= ~ENABLE_INTR;
4924 WREG32(IH_RB_CNTL, ih_rb_cntl);
4925 WREG32(IH_CNTL, ih_cntl);
4926 /* set rptr, wptr to 0 */
4927 WREG32(IH_RB_RPTR, 0);
4928 WREG32(IH_RB_WPTR, 0);
4929 rdev->ih.enabled = false;
4934 * cik_disable_interrupt_state - Disable all interrupt sources
4936 * @rdev: radeon_device pointer
4938 * Clear all interrupt enable bits used by the driver (CIK).
4940 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4945 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4947 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4948 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4949 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4950 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4951 /* compute queues */
4952 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4953 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4954 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4955 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4956 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4957 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4958 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4959 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4961 WREG32(GRBM_INT_CNTL, 0);
4962 /* vline/vblank, etc. */
4963 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4964 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4965 if (rdev->num_crtc >= 4) {
4966 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4967 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4969 if (rdev->num_crtc >= 6) {
4970 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4971 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4975 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4977 /* digital hotplug */
4978 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4979 WREG32(DC_HPD1_INT_CONTROL, tmp);
4980 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4981 WREG32(DC_HPD2_INT_CONTROL, tmp);
4982 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4983 WREG32(DC_HPD3_INT_CONTROL, tmp);
4984 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4985 WREG32(DC_HPD4_INT_CONTROL, tmp);
4986 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4987 WREG32(DC_HPD5_INT_CONTROL, tmp);
4988 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4989 WREG32(DC_HPD6_INT_CONTROL, tmp);
4994 * cik_irq_init - init and enable the interrupt ring
4996 * @rdev: radeon_device pointer
4998 * Allocate a ring buffer for the interrupt controller,
4999 * enable the RLC, disable interrupts, enable the IH
5000 * ring buffer and enable it (CIK).
5001 * Called at device load and reume.
5002 * Returns 0 for success, errors for failure.
5004 static __unused int cik_irq_init(struct radeon_device *rdev)
5008 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5011 ret = r600_ih_ring_alloc(rdev);
5016 cik_disable_interrupts(rdev);
5019 ret = cik_rlc_resume(rdev);
5021 r600_ih_ring_fini(rdev);
5025 /* setup interrupt control */
5026 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5027 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5028 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5029 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5030 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5032 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5033 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5034 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5035 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5037 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5038 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5040 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5041 IH_WPTR_OVERFLOW_CLEAR |
5044 if (rdev->wb.enabled)
5045 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5047 /* set the writeback address whether it's enabled or not */
5048 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5049 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5051 WREG32(IH_RB_CNTL, ih_rb_cntl);
5053 /* set rptr, wptr to 0 */
5054 WREG32(IH_RB_RPTR, 0);
5055 WREG32(IH_RB_WPTR, 0);
5057 /* Default settings for IH_CNTL (disabled at first) */
5058 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5059 /* RPTR_REARM only works if msi's are enabled */
5060 if (rdev->msi_enabled)
5061 ih_cntl |= RPTR_REARM;
5062 WREG32(IH_CNTL, ih_cntl);
5064 /* force the active interrupt state to all disabled */
5065 cik_disable_interrupt_state(rdev);
5067 pci_enable_busmaster(rdev->dev);
5070 cik_enable_interrupts(rdev);
5076 * cik_irq_set - enable/disable interrupt sources
5078 * @rdev: radeon_device pointer
5080 * Enable interrupt sources on the GPU (vblanks, hpd,
5082 * Returns 0 for success, errors for failure.
5084 int cik_irq_set(struct radeon_device *rdev)
5086 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5087 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5088 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5089 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5090 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5091 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5092 u32 grbm_int_cntl = 0;
5093 u32 dma_cntl, dma_cntl1;
5095 if (!rdev->irq.installed) {
5096 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5099 /* don't enable anything if the ih is disabled */
5100 if (!rdev->ih.enabled) {
5101 cik_disable_interrupts(rdev);
5102 /* force the active interrupt state to all disabled */
5103 cik_disable_interrupt_state(rdev);
5107 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5108 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5109 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5110 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5111 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5112 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5114 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5115 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5117 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5118 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5119 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5120 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5121 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5122 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5123 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5124 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5126 /* enable CP interrupts on all rings */
5127 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5128 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5129 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5131 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5132 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5133 DRM_DEBUG("si_irq_set: sw int cp1\n");
5134 if (ring->me == 1) {
5135 switch (ring->pipe) {
5137 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5140 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5143 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5146 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5149 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5152 } else if (ring->me == 2) {
5153 switch (ring->pipe) {
5155 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5158 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5161 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5164 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5167 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5171 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5174 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5175 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5176 DRM_DEBUG("si_irq_set: sw int cp2\n");
5177 if (ring->me == 1) {
5178 switch (ring->pipe) {
5180 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5183 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5186 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5189 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5192 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5195 } else if (ring->me == 2) {
5196 switch (ring->pipe) {
5198 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5201 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5204 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5207 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5210 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5214 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5218 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5219 DRM_DEBUG("cik_irq_set: sw int dma\n");
5220 dma_cntl |= TRAP_ENABLE;
5223 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5224 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5225 dma_cntl1 |= TRAP_ENABLE;
5228 if (rdev->irq.crtc_vblank_int[0] ||
5229 atomic_read(&rdev->irq.pflip[0])) {
5230 DRM_DEBUG("cik_irq_set: vblank 0\n");
5231 crtc1 |= VBLANK_INTERRUPT_MASK;
5233 if (rdev->irq.crtc_vblank_int[1] ||
5234 atomic_read(&rdev->irq.pflip[1])) {
5235 DRM_DEBUG("cik_irq_set: vblank 1\n");
5236 crtc2 |= VBLANK_INTERRUPT_MASK;
5238 if (rdev->irq.crtc_vblank_int[2] ||
5239 atomic_read(&rdev->irq.pflip[2])) {
5240 DRM_DEBUG("cik_irq_set: vblank 2\n");
5241 crtc3 |= VBLANK_INTERRUPT_MASK;
5243 if (rdev->irq.crtc_vblank_int[3] ||
5244 atomic_read(&rdev->irq.pflip[3])) {
5245 DRM_DEBUG("cik_irq_set: vblank 3\n");
5246 crtc4 |= VBLANK_INTERRUPT_MASK;
5248 if (rdev->irq.crtc_vblank_int[4] ||
5249 atomic_read(&rdev->irq.pflip[4])) {
5250 DRM_DEBUG("cik_irq_set: vblank 4\n");
5251 crtc5 |= VBLANK_INTERRUPT_MASK;
5253 if (rdev->irq.crtc_vblank_int[5] ||
5254 atomic_read(&rdev->irq.pflip[5])) {
5255 DRM_DEBUG("cik_irq_set: vblank 5\n");
5256 crtc6 |= VBLANK_INTERRUPT_MASK;
5258 if (rdev->irq.hpd[0]) {
5259 DRM_DEBUG("cik_irq_set: hpd 1\n");
5260 hpd1 |= DC_HPDx_INT_EN;
5262 if (rdev->irq.hpd[1]) {
5263 DRM_DEBUG("cik_irq_set: hpd 2\n");
5264 hpd2 |= DC_HPDx_INT_EN;
5266 if (rdev->irq.hpd[2]) {
5267 DRM_DEBUG("cik_irq_set: hpd 3\n");
5268 hpd3 |= DC_HPDx_INT_EN;
5270 if (rdev->irq.hpd[3]) {
5271 DRM_DEBUG("cik_irq_set: hpd 4\n");
5272 hpd4 |= DC_HPDx_INT_EN;
5274 if (rdev->irq.hpd[4]) {
5275 DRM_DEBUG("cik_irq_set: hpd 5\n");
5276 hpd5 |= DC_HPDx_INT_EN;
5278 if (rdev->irq.hpd[5]) {
5279 DRM_DEBUG("cik_irq_set: hpd 6\n");
5280 hpd6 |= DC_HPDx_INT_EN;
5283 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5285 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5286 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5288 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5289 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5290 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5291 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5292 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5293 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5294 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5295 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5297 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5299 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5300 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5301 if (rdev->num_crtc >= 4) {
5302 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5303 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5305 if (rdev->num_crtc >= 6) {
5306 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5307 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5310 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5311 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5312 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5313 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5314 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5315 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5321 * cik_irq_ack - ack interrupt sources
5323 * @rdev: radeon_device pointer
5325 * Ack interrupt sources on the GPU (vblanks, hpd,
5326 * etc.) (CIK). Certain interrupts sources are sw
5327 * generated and do not require an explicit ack.
5329 static inline void cik_irq_ack(struct radeon_device *rdev)
5333 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5334 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5335 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5336 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5337 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5338 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5339 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5341 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5342 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5343 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5344 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5345 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5346 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5347 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5348 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5350 if (rdev->num_crtc >= 4) {
5351 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5352 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5353 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5354 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5355 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5356 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5357 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5358 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5361 if (rdev->num_crtc >= 6) {
5362 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5363 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5364 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5365 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5366 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5367 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5368 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5369 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5372 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5373 tmp = RREG32(DC_HPD1_INT_CONTROL);
5374 tmp |= DC_HPDx_INT_ACK;
5375 WREG32(DC_HPD1_INT_CONTROL, tmp);
5377 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5378 tmp = RREG32(DC_HPD2_INT_CONTROL);
5379 tmp |= DC_HPDx_INT_ACK;
5380 WREG32(DC_HPD2_INT_CONTROL, tmp);
5382 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5383 tmp = RREG32(DC_HPD3_INT_CONTROL);
5384 tmp |= DC_HPDx_INT_ACK;
5385 WREG32(DC_HPD3_INT_CONTROL, tmp);
5387 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5388 tmp = RREG32(DC_HPD4_INT_CONTROL);
5389 tmp |= DC_HPDx_INT_ACK;
5390 WREG32(DC_HPD4_INT_CONTROL, tmp);
5392 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5393 tmp = RREG32(DC_HPD5_INT_CONTROL);
5394 tmp |= DC_HPDx_INT_ACK;
5395 WREG32(DC_HPD5_INT_CONTROL, tmp);
5397 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5398 tmp = RREG32(DC_HPD5_INT_CONTROL);
5399 tmp |= DC_HPDx_INT_ACK;
5400 WREG32(DC_HPD6_INT_CONTROL, tmp);
5405 * cik_irq_disable - disable interrupts
5407 * @rdev: radeon_device pointer
5409 * Disable interrupts on the hw (CIK).
5411 static void cik_irq_disable(struct radeon_device *rdev)
5413 cik_disable_interrupts(rdev);
5414 /* Wait and acknowledge irq */
5417 cik_disable_interrupt_state(rdev);
5421 * cik_irq_disable - disable interrupts for suspend
5423 * @rdev: radeon_device pointer
5425 * Disable interrupts and stop the RLC (CIK).
5428 static void cik_irq_suspend(struct radeon_device *rdev)
5430 cik_irq_disable(rdev);
5435 * cik_irq_fini - tear down interrupt support
5437 * @rdev: radeon_device pointer
5439 * Disable interrupts on the hw and free the IH ring
5441 * Used for driver unload.
5443 static __unused void cik_irq_fini(struct radeon_device *rdev)
5445 cik_irq_suspend(rdev);
5446 r600_ih_ring_fini(rdev);
5450 * cik_get_ih_wptr - get the IH ring buffer wptr
5452 * @rdev: radeon_device pointer
5454 * Get the IH ring buffer wptr from either the register
5455 * or the writeback memory buffer (CIK). Also check for
5456 * ring buffer overflow and deal with it.
5457 * Used by cik_irq_process().
5458 * Returns the value of the wptr.
5460 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5464 if (rdev->wb.enabled)
5465 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5467 wptr = RREG32(IH_RB_WPTR);
5469 if (wptr & RB_OVERFLOW) {
5470 /* When a ring buffer overflow happen start parsing interrupt
5471 * from the last not overwritten vector (wptr + 16). Hopefully
5472 * this should allow us to catchup.
5474 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5475 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5476 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5477 tmp = RREG32(IH_RB_CNTL);
5478 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5479 WREG32(IH_RB_CNTL, tmp);
5481 return (wptr & rdev->ih.ptr_mask);
5485 * Each IV ring entry is 128 bits:
5486 * [7:0] - interrupt source id
5488 * [59:32] - interrupt source data
5489 * [63:60] - reserved
5492 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5493 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5494 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5495 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5496 * PIPE_ID - ME0 0=3D
5497 * - ME1&2 compute dispatcher (4 pipes each)
5499 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5500 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5501 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5504 * [127:96] - reserved
5507 * cik_irq_process - interrupt handler
5509 * @rdev: radeon_device pointer
5511 * Interrupt hander (CIK). Walk the IH ring,
5512 * ack interrupts and schedule work to handle
5514 * Returns irq process return code.
5516 irqreturn_t cik_irq_process(struct radeon_device *rdev)
5518 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5519 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5522 u32 src_id, src_data, ring_id;
5523 u8 me_id, pipe_id, queue_id;
5525 bool queue_hotplug = false;
5526 bool queue_reset = false;
5527 u32 addr, status, mc_client;
5529 if (!rdev->ih.enabled || rdev->shutdown)
5532 wptr = cik_get_ih_wptr(rdev);
5535 /* is somebody else already processing irqs? */
5536 if (atomic_xchg(&rdev->ih.lock, 1))
5539 rptr = rdev->ih.rptr;
5540 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5542 /* Order reading of wptr vs. reading of IH ring data */
5545 /* display interrupts */
5548 while (rptr != wptr) {
5549 /* wptr/rptr are in bytes! */
5550 ring_index = rptr / 4;
5551 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5552 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5553 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5556 case 1: /* D1 vblank/vline */
5558 case 0: /* D1 vblank */
5559 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5560 if (rdev->irq.crtc_vblank_int[0]) {
5561 drm_handle_vblank(rdev->ddev, 0);
5562 rdev->pm.vblank_sync = true;
5563 wake_up(&rdev->irq.vblank_queue);
5565 if (atomic_read(&rdev->irq.pflip[0]))
5566 radeon_crtc_handle_flip(rdev, 0);
5567 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5568 DRM_DEBUG("IH: D1 vblank\n");
5571 case 1: /* D1 vline */
5572 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5573 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5574 DRM_DEBUG("IH: D1 vline\n");
5578 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5582 case 2: /* D2 vblank/vline */
5584 case 0: /* D2 vblank */
5585 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5586 if (rdev->irq.crtc_vblank_int[1]) {
5587 drm_handle_vblank(rdev->ddev, 1);
5588 rdev->pm.vblank_sync = true;
5589 wake_up(&rdev->irq.vblank_queue);
5591 if (atomic_read(&rdev->irq.pflip[1]))
5592 radeon_crtc_handle_flip(rdev, 1);
5593 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5594 DRM_DEBUG("IH: D2 vblank\n");
5597 case 1: /* D2 vline */
5598 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5599 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5600 DRM_DEBUG("IH: D2 vline\n");
5604 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5608 case 3: /* D3 vblank/vline */
5610 case 0: /* D3 vblank */
5611 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5612 if (rdev->irq.crtc_vblank_int[2]) {
5613 drm_handle_vblank(rdev->ddev, 2);
5614 rdev->pm.vblank_sync = true;
5615 wake_up(&rdev->irq.vblank_queue);
5617 if (atomic_read(&rdev->irq.pflip[2]))
5618 radeon_crtc_handle_flip(rdev, 2);
5619 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5620 DRM_DEBUG("IH: D3 vblank\n");
5623 case 1: /* D3 vline */
5624 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5625 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5626 DRM_DEBUG("IH: D3 vline\n");
5630 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5634 case 4: /* D4 vblank/vline */
5636 case 0: /* D4 vblank */
5637 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5638 if (rdev->irq.crtc_vblank_int[3]) {
5639 drm_handle_vblank(rdev->ddev, 3);
5640 rdev->pm.vblank_sync = true;
5641 wake_up(&rdev->irq.vblank_queue);
5643 if (atomic_read(&rdev->irq.pflip[3]))
5644 radeon_crtc_handle_flip(rdev, 3);
5645 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5646 DRM_DEBUG("IH: D4 vblank\n");
5649 case 1: /* D4 vline */
5650 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5651 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5652 DRM_DEBUG("IH: D4 vline\n");
5656 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5660 case 5: /* D5 vblank/vline */
5662 case 0: /* D5 vblank */
5663 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5664 if (rdev->irq.crtc_vblank_int[4]) {
5665 drm_handle_vblank(rdev->ddev, 4);
5666 rdev->pm.vblank_sync = true;
5667 wake_up(&rdev->irq.vblank_queue);
5669 if (atomic_read(&rdev->irq.pflip[4]))
5670 radeon_crtc_handle_flip(rdev, 4);
5671 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5672 DRM_DEBUG("IH: D5 vblank\n");
5675 case 1: /* D5 vline */
5676 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5677 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5678 DRM_DEBUG("IH: D5 vline\n");
5682 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5686 case 6: /* D6 vblank/vline */
5688 case 0: /* D6 vblank */
5689 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5690 if (rdev->irq.crtc_vblank_int[5]) {
5691 drm_handle_vblank(rdev->ddev, 5);
5692 rdev->pm.vblank_sync = true;
5693 wake_up(&rdev->irq.vblank_queue);
5695 if (atomic_read(&rdev->irq.pflip[5]))
5696 radeon_crtc_handle_flip(rdev, 5);
5697 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5698 DRM_DEBUG("IH: D6 vblank\n");
5701 case 1: /* D6 vline */
5702 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5703 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5704 DRM_DEBUG("IH: D6 vline\n");
5708 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5712 case 42: /* HPD hotplug */
5715 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5716 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5717 queue_hotplug = true;
5718 DRM_DEBUG("IH: HPD1\n");
5722 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5723 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5724 queue_hotplug = true;
5725 DRM_DEBUG("IH: HPD2\n");
5729 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5730 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5731 queue_hotplug = true;
5732 DRM_DEBUG("IH: HPD3\n");
5736 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5737 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5738 queue_hotplug = true;
5739 DRM_DEBUG("IH: HPD4\n");
5743 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5744 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5745 queue_hotplug = true;
5746 DRM_DEBUG("IH: HPD5\n");
5750 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5751 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5752 queue_hotplug = true;
5753 DRM_DEBUG("IH: HPD6\n");
5757 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5763 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5764 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5765 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5766 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5767 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5769 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5771 cik_vm_decode_fault(rdev, status, addr, mc_client);
5772 /* reset addr and status */
5773 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5775 case 176: /* GFX RB CP_INT */
5776 case 177: /* GFX IB CP_INT */
5777 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5779 case 181: /* CP EOP event */
5780 DRM_DEBUG("IH: CP EOP\n");
5781 /* XXX check the bitfield order! */
5782 me_id = (ring_id & 0x60) >> 5;
5783 pipe_id = (ring_id & 0x18) >> 3;
5784 queue_id = (ring_id & 0x7) >> 0;
5787 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5791 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5792 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5793 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5794 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5798 case 184: /* CP Privileged reg access */
5799 DRM_ERROR("Illegal register access in command stream\n");
5800 /* XXX check the bitfield order! */
5801 me_id = (ring_id & 0x60) >> 5;
5802 pipe_id = (ring_id & 0x18) >> 3;
5803 queue_id = (ring_id & 0x7) >> 0;
5806 /* This results in a full GPU reset, but all we need to do is soft
5807 * reset the CP for gfx
5821 case 185: /* CP Privileged inst */
5822 DRM_ERROR("Illegal instruction in command stream\n");
5823 /* XXX check the bitfield order! */
5824 me_id = (ring_id & 0x60) >> 5;
5825 pipe_id = (ring_id & 0x18) >> 3;
5826 queue_id = (ring_id & 0x7) >> 0;
5829 /* This results in a full GPU reset, but all we need to do is soft
5830 * reset the CP for gfx
5844 case 224: /* SDMA trap event */
5845 /* XXX check the bitfield order! */
5846 me_id = (ring_id & 0x3) >> 0;
5847 queue_id = (ring_id & 0xc) >> 2;
5848 DRM_DEBUG("IH: SDMA trap\n");
5853 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5866 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5878 case 241: /* SDMA Privileged inst */
5879 case 247: /* SDMA Privileged inst */
5880 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5881 /* XXX check the bitfield order! */
5882 me_id = (ring_id & 0x3) >> 0;
5883 queue_id = (ring_id & 0xc) >> 2;
5917 case 233: /* GUI IDLE */
5918 DRM_DEBUG("IH: GUI idle\n");
5921 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5925 /* wptr/rptr are in bytes! */
5927 rptr &= rdev->ih.ptr_mask;
5930 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
5932 taskqueue_enqueue(rdev->tq, &rdev->reset_work);
5933 rdev->ih.rptr = rptr;
5934 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5935 atomic_set(&rdev->ih.lock, 0);
5937 /* make sure wptr hasn't changed while processing */
5938 wptr = cik_get_ih_wptr(rdev);
5946 * startup/shutdown callbacks
5949 * cik_startup - program the asic to a functional state
5951 * @rdev: radeon_device pointer
5953 * Programs the asic to a functional state (CIK).
5954 * Called by cik_init() and cik_resume().
5955 * Returns 0 for success, error for failure.
5957 static int cik_startup(struct radeon_device *rdev)
5959 struct radeon_ring *ring;
5962 cik_mc_program(rdev);
5964 if (rdev->flags & RADEON_IS_IGP) {
5965 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5966 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5967 r = cik_init_microcode(rdev);
5969 DRM_ERROR("Failed to load firmware!\n");
5974 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5975 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5977 r = cik_init_microcode(rdev);
5979 DRM_ERROR("Failed to load firmware!\n");
5984 r = ci_mc_load_microcode(rdev);
5986 DRM_ERROR("Failed to load MC firmware!\n");
5991 r = r600_vram_scratch_init(rdev);
5995 r = cik_pcie_gart_enable(rdev);
6000 /* allocate rlc buffers */
6001 r = si_rlc_init(rdev);
6003 DRM_ERROR("Failed to init rlc BOs!\n");
6007 /* allocate wb buffer */
6008 r = radeon_wb_init(rdev);
6012 /* allocate mec buffers */
6013 r = cik_mec_init(rdev);
6015 DRM_ERROR("Failed to init MEC BOs!\n");
6019 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6021 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6025 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6027 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6031 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6033 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6037 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6039 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6043 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6045 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6049 r = cik_uvd_resume(rdev);
6051 r = radeon_fence_driver_start_ring(rdev,
6052 R600_RING_TYPE_UVD_INDEX);
6054 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6057 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6060 if (!rdev->irq.installed) {
6061 r = radeon_irq_kms_init(rdev);
6066 r = cik_irq_init(rdev);
6068 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6069 radeon_irq_kms_fini(rdev);
6074 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6075 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6076 CP_RB0_RPTR, CP_RB0_WPTR,
6077 0, 0xfffff, RADEON_CP_PACKET2);
6081 /* set up the compute queues */
6082 /* type-2 packets are deprecated on MEC, use type-3 instead */
6083 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6084 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6085 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6086 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6089 ring->me = 1; /* first MEC */
6090 ring->pipe = 0; /* first pipe */
6091 ring->queue = 0; /* first queue */
6092 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6094 /* type-2 packets are deprecated on MEC, use type-3 instead */
6095 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6096 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6097 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6098 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6101 /* dGPU only have 1 MEC */
6102 ring->me = 1; /* first MEC */
6103 ring->pipe = 0; /* first pipe */
6104 ring->queue = 1; /* second queue */
6105 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6107 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6108 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6109 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6110 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6111 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6115 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6116 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6117 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6118 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6119 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6123 r = cik_cp_resume(rdev);
6127 r = cik_sdma_resume(rdev);
6131 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6132 if (ring->ring_size) {
6133 r = radeon_ring_init(rdev, ring, ring->ring_size,
6134 R600_WB_UVD_RPTR_OFFSET,
6135 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6136 0, 0xfffff, RADEON_CP_PACKET2);
6138 r = r600_uvd_init(rdev);
6140 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6143 r = radeon_ib_pool_init(rdev);
6145 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6149 r = radeon_vm_manager_init(rdev);
6151 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6159 * cik_resume - resume the asic to a functional state
6161 * @rdev: radeon_device pointer
6163 * Programs the asic to a functional state (CIK).
6165 * Returns 0 for success, error for failure.
6167 int cik_resume(struct radeon_device *rdev)
6172 atom_asic_init(rdev->mode_info.atom_context);
6174 /* init golden registers */
6175 cik_init_golden_registers(rdev);
6177 rdev->accel_working = true;
6178 r = cik_startup(rdev);
6180 DRM_ERROR("cik startup failed on resume\n");
6181 rdev->accel_working = false;
6190 * cik_suspend - suspend the asic
6192 * @rdev: radeon_device pointer
6194 * Bring the chip into a state suitable for suspend (CIK).
6195 * Called at suspend.
6196 * Returns 0 for success.
6198 int cik_suspend(struct radeon_device *rdev)
6200 radeon_vm_manager_fini(rdev);
6201 cik_cp_enable(rdev, false);
6202 cik_sdma_enable(rdev, false);
6203 r600_uvd_stop(rdev);
6204 radeon_uvd_suspend(rdev);
6205 cik_irq_suspend(rdev);
6206 radeon_wb_disable(rdev);
6207 cik_pcie_gart_disable(rdev);
6211 /* Plan is to move initialization in that function and use
6212 * helper function so that radeon_device_init pretty much
6213 * do nothing more than calling asic specific function. This
6214 * should also allow to remove a bunch of callback function
6218 * cik_init - asic specific driver and hw init
6220 * @rdev: radeon_device pointer
6222 * Setup asic specific driver variables and program the hw
6223 * to a functional state (CIK).
6224 * Called at driver startup.
6225 * Returns 0 for success, errors for failure.
6227 int cik_init(struct radeon_device *rdev)
6229 struct radeon_ring *ring;
6233 if (!radeon_get_bios(rdev)) {
6234 if (ASIC_IS_AVIVO(rdev))
6237 /* Must be an ATOMBIOS */
6238 if (!rdev->is_atom_bios) {
6239 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6242 r = radeon_atombios_init(rdev);
6246 /* Post card if necessary */
6247 if (!radeon_card_posted(rdev)) {
6249 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6252 DRM_INFO("GPU not posted. posting now...\n");
6253 atom_asic_init(rdev->mode_info.atom_context);
6255 /* init golden registers */
6256 cik_init_golden_registers(rdev);
6257 /* Initialize scratch registers */
6258 cik_scratch_init(rdev);
6259 /* Initialize surface registers */
6260 radeon_surface_init(rdev);
6261 /* Initialize clocks */
6262 radeon_get_clock_info(rdev->ddev);
6265 r = radeon_fence_driver_init(rdev);
6269 /* initialize memory controller */
6270 r = cik_mc_init(rdev);
6273 /* Memory manager */
6274 r = radeon_bo_init(rdev);
6278 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6279 ring->ring_obj = NULL;
6280 r600_ring_init(rdev, ring, 1024 * 1024);
6282 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6283 ring->ring_obj = NULL;
6284 r600_ring_init(rdev, ring, 1024 * 1024);
6285 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6289 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6290 ring->ring_obj = NULL;
6291 r600_ring_init(rdev, ring, 1024 * 1024);
6292 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6296 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6297 ring->ring_obj = NULL;
6298 r600_ring_init(rdev, ring, 256 * 1024);
6300 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6301 ring->ring_obj = NULL;
6302 r600_ring_init(rdev, ring, 256 * 1024);
6304 r = radeon_uvd_init(rdev);
6306 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6307 ring->ring_obj = NULL;
6308 r600_ring_init(rdev, ring, 4096);
6311 rdev->ih.ring_obj = NULL;
6312 r600_ih_ring_init(rdev, 64 * 1024);
6314 r = r600_pcie_gart_init(rdev);
6318 rdev->accel_working = true;
6319 r = cik_startup(rdev);
6321 dev_err(rdev->dev, "disabling GPU acceleration\n");
6323 cik_sdma_fini(rdev);
6327 radeon_wb_fini(rdev);
6328 radeon_ib_pool_fini(rdev);
6329 radeon_vm_manager_fini(rdev);
6330 radeon_irq_kms_fini(rdev);
6331 cik_pcie_gart_fini(rdev);
6332 rdev->accel_working = false;
6335 /* Don't start up if the MC ucode is missing.
6336 * The default clocks and voltages before the MC ucode
6337 * is loaded are not suffient for advanced operations.
6339 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6340 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6348 * cik_fini - asic specific driver and hw fini
6350 * @rdev: radeon_device pointer
6352 * Tear down the asic specific driver variables and program the hw
6353 * to an idle state (CIK).
6354 * Called at driver unload.
6356 void cik_fini(struct radeon_device *rdev)
6359 cik_sdma_fini(rdev);
6363 radeon_wb_fini(rdev);
6364 radeon_vm_manager_fini(rdev);
6365 radeon_ib_pool_fini(rdev);
6366 radeon_irq_kms_fini(rdev);
6367 r600_uvd_stop(rdev);
6368 radeon_uvd_fini(rdev);
6369 cik_pcie_gart_fini(rdev);
6370 r600_vram_scratch_fini(rdev);
6371 radeon_gem_fini(rdev);
6372 radeon_fence_driver_fini(rdev);
6373 radeon_bo_fini(rdev);
6374 radeon_atombios_fini(rdev);
6379 /* display watermark setup */
6381 * dce8_line_buffer_adjust - Set up the line buffer
6383 * @rdev: radeon_device pointer
6384 * @radeon_crtc: the selected display controller
6385 * @mode: the current display mode on the selected display
6388 * Setup up the line buffer allocation for
6389 * the selected display controller (CIK).
6390 * Returns the line buffer size in pixels.
6392 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6393 struct radeon_crtc *radeon_crtc,
6394 struct drm_display_mode *mode)
6400 * There are 6 line buffers, one for each display controllers.
6401 * There are 3 partitions per LB. Select the number of partitions
6402 * to enable based on the display width. For display widths larger
6403 * than 4096, you need use to use 2 display controllers and combine
6404 * them using the stereo blender.
6406 if (radeon_crtc->base.enabled && mode) {
6407 if (mode->crtc_hdisplay < 1920)
6409 else if (mode->crtc_hdisplay < 2560)
6411 else if (mode->crtc_hdisplay < 4096)
6414 DRM_DEBUG_KMS("Mode too big for LB!\n");
6420 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6421 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6423 if (radeon_crtc->base.enabled && mode) {
6435 /* controller not enabled, so no lb used */
6440 * cik_get_number_of_dram_channels - get the number of dram channels
6442 * @rdev: radeon_device pointer
6444 * Look up the number of video ram channels (CIK).
6445 * Used for display watermark bandwidth calculations
6446 * Returns the number of dram channels
6448 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6450 u32 tmp = RREG32(MC_SHARED_CHMAP);
6452 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6475 struct dce8_wm_params {
6476 u32 dram_channels; /* number of dram channels */
6477 u32 yclk; /* bandwidth per dram data pin in kHz */
6478 u32 sclk; /* engine clock in kHz */
6479 u32 disp_clk; /* display clock in kHz */
6480 u32 src_width; /* viewport width */
6481 u32 active_time; /* active display time in ns */
6482 u32 blank_time; /* blank time in ns */
6483 bool interlaced; /* mode is interlaced */
6484 fixed20_12 vsc; /* vertical scale ratio */
6485 u32 num_heads; /* number of active crtcs */
6486 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6487 u32 lb_size; /* line buffer allocated to pipe */
6488 u32 vtaps; /* vertical scaler taps */
6492 * dce8_dram_bandwidth - get the dram bandwidth
6494 * @wm: watermark calculation data
6496 * Calculate the raw dram bandwidth (CIK).
6497 * Used for display watermark bandwidth calculations
6498 * Returns the dram bandwidth in MBytes/s
6500 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6502 /* Calculate raw DRAM Bandwidth */
6503 fixed20_12 dram_efficiency; /* 0.7 */
6504 fixed20_12 yclk, dram_channels, bandwidth;
6507 a.full = dfixed_const(1000);
6508 yclk.full = dfixed_const(wm->yclk);
6509 yclk.full = dfixed_div(yclk, a);
6510 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6511 a.full = dfixed_const(10);
6512 dram_efficiency.full = dfixed_const(7);
6513 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6514 bandwidth.full = dfixed_mul(dram_channels, yclk);
6515 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6517 return dfixed_trunc(bandwidth);
6521 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6523 * @wm: watermark calculation data
6525 * Calculate the dram bandwidth used for display (CIK).
6526 * Used for display watermark bandwidth calculations
6527 * Returns the dram bandwidth for display in MBytes/s
6529 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6531 /* Calculate DRAM Bandwidth and the part allocated to display. */
6532 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6533 fixed20_12 yclk, dram_channels, bandwidth;
6536 a.full = dfixed_const(1000);
6537 yclk.full = dfixed_const(wm->yclk);
6538 yclk.full = dfixed_div(yclk, a);
6539 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6540 a.full = dfixed_const(10);
6541 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6542 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6543 bandwidth.full = dfixed_mul(dram_channels, yclk);
6544 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6546 return dfixed_trunc(bandwidth);
6550 * dce8_data_return_bandwidth - get the data return bandwidth
6552 * @wm: watermark calculation data
6554 * Calculate the data return bandwidth used for display (CIK).
6555 * Used for display watermark bandwidth calculations
6556 * Returns the data return bandwidth in MBytes/s
6558 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6560 /* Calculate the display Data return Bandwidth */
6561 fixed20_12 return_efficiency; /* 0.8 */
6562 fixed20_12 sclk, bandwidth;
6565 a.full = dfixed_const(1000);
6566 sclk.full = dfixed_const(wm->sclk);
6567 sclk.full = dfixed_div(sclk, a);
6568 a.full = dfixed_const(10);
6569 return_efficiency.full = dfixed_const(8);
6570 return_efficiency.full = dfixed_div(return_efficiency, a);
6571 a.full = dfixed_const(32);
6572 bandwidth.full = dfixed_mul(a, sclk);
6573 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6575 return dfixed_trunc(bandwidth);
6579 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6581 * @wm: watermark calculation data
6583 * Calculate the dmif bandwidth used for display (CIK).
6584 * Used for display watermark bandwidth calculations
6585 * Returns the dmif bandwidth in MBytes/s
6587 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6589 /* Calculate the DMIF Request Bandwidth */
6590 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6591 fixed20_12 disp_clk, bandwidth;
6594 a.full = dfixed_const(1000);
6595 disp_clk.full = dfixed_const(wm->disp_clk);
6596 disp_clk.full = dfixed_div(disp_clk, a);
6597 a.full = dfixed_const(32);
6598 b.full = dfixed_mul(a, disp_clk);
6600 a.full = dfixed_const(10);
6601 disp_clk_request_efficiency.full = dfixed_const(8);
6602 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6604 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6606 return dfixed_trunc(bandwidth);
6610 * dce8_available_bandwidth - get the min available bandwidth
6612 * @wm: watermark calculation data
6614 * Calculate the min available bandwidth used for display (CIK).
6615 * Used for display watermark bandwidth calculations
6616 * Returns the min available bandwidth in MBytes/s
6618 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6620 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6621 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6622 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6623 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6625 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6629 * dce8_average_bandwidth - get the average available bandwidth
6631 * @wm: watermark calculation data
6633 * Calculate the average available bandwidth used for display (CIK).
6634 * Used for display watermark bandwidth calculations
6635 * Returns the average available bandwidth in MBytes/s
6637 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6639 /* Calculate the display mode Average Bandwidth
6640 * DisplayMode should contain the source and destination dimensions,
6644 fixed20_12 line_time;
6645 fixed20_12 src_width;
6646 fixed20_12 bandwidth;
6649 a.full = dfixed_const(1000);
6650 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6651 line_time.full = dfixed_div(line_time, a);
6652 bpp.full = dfixed_const(wm->bytes_per_pixel);
6653 src_width.full = dfixed_const(wm->src_width);
6654 bandwidth.full = dfixed_mul(src_width, bpp);
6655 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6656 bandwidth.full = dfixed_div(bandwidth, line_time);
6658 return dfixed_trunc(bandwidth);
6662 * dce8_latency_watermark - get the latency watermark
6664 * @wm: watermark calculation data
6666 * Calculate the latency watermark (CIK).
6667 * Used for display watermark bandwidth calculations
6668 * Returns the latency watermark in ns
6670 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6672 /* First calculate the latency in ns */
6673 u32 mc_latency = 2000; /* 2000 ns. */
6674 u32 available_bandwidth = dce8_available_bandwidth(wm);
6675 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6676 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6677 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6678 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6679 (wm->num_heads * cursor_line_pair_return_time);
6680 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6681 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6682 u32 tmp, dmif_size = 12288;
6685 if (wm->num_heads == 0)
6688 a.full = dfixed_const(2);
6689 b.full = dfixed_const(1);
6690 if ((wm->vsc.full > a.full) ||
6691 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6693 ((wm->vsc.full >= a.full) && wm->interlaced))
6694 max_src_lines_per_dst_line = 4;
6696 max_src_lines_per_dst_line = 2;
6698 a.full = dfixed_const(available_bandwidth);
6699 b.full = dfixed_const(wm->num_heads);
6700 a.full = dfixed_div(a, b);
6702 b.full = dfixed_const(mc_latency + 512);
6703 c.full = dfixed_const(wm->disp_clk);
6704 b.full = dfixed_div(b, c);
6706 c.full = dfixed_const(dmif_size);
6707 b.full = dfixed_div(c, b);
6709 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6711 b.full = dfixed_const(1000);
6712 c.full = dfixed_const(wm->disp_clk);
6713 b.full = dfixed_div(c, b);
6714 c.full = dfixed_const(wm->bytes_per_pixel);
6715 b.full = dfixed_mul(b, c);
6717 lb_fill_bw = min(tmp, dfixed_trunc(b));
6719 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6720 b.full = dfixed_const(1000);
6721 c.full = dfixed_const(lb_fill_bw);
6722 b.full = dfixed_div(c, b);
6723 a.full = dfixed_div(a, b);
6724 line_fill_time = dfixed_trunc(a);
6726 if (line_fill_time < wm->active_time)
6729 return latency + (line_fill_time - wm->active_time);
6734 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6735 * average and available dram bandwidth
6737 * @wm: watermark calculation data
6739 * Check if the display average bandwidth fits in the display
6740 * dram bandwidth (CIK).
6741 * Used for display watermark bandwidth calculations
6742 * Returns true if the display fits, false if not.
6744 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6746 if (dce8_average_bandwidth(wm) <=
6747 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6754 * dce8_average_bandwidth_vs_available_bandwidth - check
6755 * average and available bandwidth
6757 * @wm: watermark calculation data
6759 * Check if the display average bandwidth fits in the display
6760 * available bandwidth (CIK).
6761 * Used for display watermark bandwidth calculations
6762 * Returns true if the display fits, false if not.
6764 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6766 if (dce8_average_bandwidth(wm) <=
6767 (dce8_available_bandwidth(wm) / wm->num_heads))
6774 * dce8_check_latency_hiding - check latency hiding
6776 * @wm: watermark calculation data
6778 * Check latency hiding (CIK).
6779 * Used for display watermark bandwidth calculations
6780 * Returns true if the display fits, false if not.
6782 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6784 u32 lb_partitions = wm->lb_size / wm->src_width;
6785 u32 line_time = wm->active_time + wm->blank_time;
6786 u32 latency_tolerant_lines;
6790 a.full = dfixed_const(1);
6791 if (wm->vsc.full > a.full)
6792 latency_tolerant_lines = 1;
6794 if (lb_partitions <= (wm->vtaps + 1))
6795 latency_tolerant_lines = 1;
6797 latency_tolerant_lines = 2;
6800 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6802 if (dce8_latency_watermark(wm) <= latency_hiding)
6809 * dce8_program_watermarks - program display watermarks
6811 * @rdev: radeon_device pointer
6812 * @radeon_crtc: the selected display controller
6813 * @lb_size: line buffer size
6814 * @num_heads: number of display controllers in use
6816 * Calculate and program the display watermarks for the
6817 * selected display controller (CIK).
6819 static void dce8_program_watermarks(struct radeon_device *rdev,
6820 struct radeon_crtc *radeon_crtc,
6821 u32 lb_size, u32 num_heads)
6823 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6824 struct dce8_wm_params wm;
6827 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6830 if (radeon_crtc->base.enabled && num_heads && mode) {
6831 pixel_period = 1000000 / (u32)mode->clock;
6832 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6834 wm.yclk = rdev->pm.current_mclk * 10;
6835 wm.sclk = rdev->pm.current_sclk * 10;
6836 wm.disp_clk = mode->clock;
6837 wm.src_width = mode->crtc_hdisplay;
6838 wm.active_time = mode->crtc_hdisplay * pixel_period;
6839 wm.blank_time = line_time - wm.active_time;
6840 wm.interlaced = false;
6841 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6842 wm.interlaced = true;
6843 wm.vsc = radeon_crtc->vsc;
6845 if (radeon_crtc->rmx_type != RMX_OFF)
6847 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6848 wm.lb_size = lb_size;
6849 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6850 wm.num_heads = num_heads;
6852 /* set for high clocks */
6853 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6854 /* set for low clocks */
6855 /* wm.yclk = low clk; wm.sclk = low clk */
6856 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6858 /* possibly force display priority to high */
6859 /* should really do this at mode validation time... */
6860 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6861 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6862 !dce8_check_latency_hiding(&wm) ||
6863 (rdev->disp_priority == 2)) {
6864 DRM_DEBUG_KMS("force priority to high\n");
6869 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6871 tmp &= ~LATENCY_WATERMARK_MASK(3);
6872 tmp |= LATENCY_WATERMARK_MASK(1);
6873 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6874 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6875 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6876 LATENCY_HIGH_WATERMARK(line_time)));
6878 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6879 tmp &= ~LATENCY_WATERMARK_MASK(3);
6880 tmp |= LATENCY_WATERMARK_MASK(2);
6881 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6882 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6883 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6884 LATENCY_HIGH_WATERMARK(line_time)));
6885 /* restore original selection */
6886 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6890 * dce8_bandwidth_update - program display watermarks
6892 * @rdev: radeon_device pointer
6894 * Calculate and program the display watermarks and line
6895 * buffer allocation (CIK).
6897 void dce8_bandwidth_update(struct radeon_device *rdev)
6899 struct drm_display_mode *mode = NULL;
6900 u32 num_heads = 0, lb_size;
6903 radeon_update_display_priority(rdev);
6905 for (i = 0; i < rdev->num_crtc; i++) {
6906 if (rdev->mode_info.crtcs[i]->base.enabled)
6909 for (i = 0; i < rdev->num_crtc; i++) {
6910 mode = &rdev->mode_info.crtcs[i]->base.mode;
6911 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6912 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6917 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6919 * @rdev: radeon_device pointer
6921 * Fetches a GPU clock counter snapshot (SI).
6922 * Returns the 64 bit clock counter snapshot.
6924 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6928 spin_lock(&rdev->gpu_clock_mutex);
6929 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6930 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6931 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6932 spin_unlock(&rdev->gpu_clock_mutex);
6936 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6937 u32 cntl_reg, u32 status_reg)
6940 struct atom_clock_dividers dividers;
6943 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6944 clock, false, ÷rs);
6948 tmp = RREG32_SMC(cntl_reg);
6949 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6950 tmp |= dividers.post_divider;
6951 WREG32_SMC(cntl_reg, tmp);
6953 for (i = 0; i < 100; i++) {
6954 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6964 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6968 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6972 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6976 int cik_uvd_resume(struct radeon_device *rdev)
6982 r = radeon_uvd_resume(rdev);
6986 /* programm the VCPU memory controller bits 0-27 */
6987 addr = rdev->uvd.gpu_addr >> 3;
6988 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 4) >> 3;
6989 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6990 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6993 size = RADEON_UVD_STACK_SIZE >> 3;
6994 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6995 WREG32(UVD_VCPU_CACHE_SIZE1, size);
6998 size = RADEON_UVD_HEAP_SIZE >> 3;
6999 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
7000 WREG32(UVD_VCPU_CACHE_SIZE2, size);
7003 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7004 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7007 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7008 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));