2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/module.h>
28 #include "radeon_asic.h"
31 #include "cik_blit_shaders.h"
34 #define CIK_PFP_UCODE_SIZE 2144
35 #define CIK_ME_UCODE_SIZE 2144
36 #define CIK_CE_UCODE_SIZE 2144
38 #define CIK_MEC_UCODE_SIZE 4192
40 #define BONAIRE_RLC_UCODE_SIZE 2048
41 #define KB_RLC_UCODE_SIZE 2560
42 #define KV_RLC_UCODE_SIZE 2560
44 #define CIK_MC_UCODE_SIZE 7866
46 #define CIK_SDMA_UCODE_SIZE 1050
47 #define CIK_SDMA_UCODE_VERSION 64
49 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
62 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
63 MODULE_FIRMWARE("radeon/KABINI_me.bin");
64 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
65 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
66 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
67 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
69 static void cik_rlc_stop(struct radeon_device *rdev);
72 * Indirect registers accessor
74 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
78 WREG32(PCIE_INDEX, reg);
79 (void)RREG32(PCIE_INDEX);
80 r = RREG32(PCIE_DATA);
84 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
86 WREG32(PCIE_INDEX, reg);
87 (void)RREG32(PCIE_INDEX);
89 (void)RREG32(PCIE_DATA);
92 static const u32 bonaire_golden_spm_registers[] =
94 0x30800, 0xe0ffffff, 0xe0000000
97 static const u32 bonaire_golden_common_registers[] =
99 0xc770, 0xffffffff, 0x00000800,
100 0xc774, 0xffffffff, 0x00000800,
101 0xc798, 0xffffffff, 0x00007fbf,
102 0xc79c, 0xffffffff, 0x00007faf
105 static const u32 bonaire_golden_registers[] =
107 0x3354, 0x00000333, 0x00000333,
108 0x3350, 0x000c0fc0, 0x00040200,
109 0x9a10, 0x00010000, 0x00058208,
110 0x3c000, 0xffff1fff, 0x00140000,
111 0x3c200, 0xfdfc0fff, 0x00000100,
112 0x3c234, 0x40000000, 0x40000200,
113 0x9830, 0xffffffff, 0x00000000,
114 0x9834, 0xf00fffff, 0x00000400,
115 0x9838, 0x0002021c, 0x00020200,
116 0xc78, 0x00000080, 0x00000000,
117 0x5bb0, 0x000000f0, 0x00000070,
118 0x5bc0, 0xf0311fff, 0x80300000,
119 0x98f8, 0x73773777, 0x12010001,
120 0x350c, 0x00810000, 0x408af000,
121 0x7030, 0x31000111, 0x00000011,
122 0x2f48, 0x73773777, 0x12010001,
123 0x220c, 0x00007fb6, 0x0021a1b1,
124 0x2210, 0x00007fb6, 0x002021b1,
125 0x2180, 0x00007fb6, 0x00002191,
126 0x2218, 0x00007fb6, 0x002121b1,
127 0x221c, 0x00007fb6, 0x002021b1,
128 0x21dc, 0x00007fb6, 0x00002191,
129 0x21e0, 0x00007fb6, 0x00002191,
130 0x3628, 0x0000003f, 0x0000000a,
131 0x362c, 0x0000003f, 0x0000000a,
132 0x2ae4, 0x00073ffe, 0x000022a2,
133 0x240c, 0x000007ff, 0x00000000,
134 0x8a14, 0xf000003f, 0x00000007,
135 0x8bf0, 0x00002001, 0x00000001,
136 0x8b24, 0xffffffff, 0x00ffffff,
137 0x30a04, 0x0000ff0f, 0x00000000,
138 0x28a4c, 0x07ffffff, 0x06000000,
139 0x4d8, 0x00000fff, 0x00000100,
140 0x3e78, 0x00000001, 0x00000002,
141 0x9100, 0x03000000, 0x0362c688,
142 0x8c00, 0x000000ff, 0x00000001,
143 0xe40, 0x00001fff, 0x00001fff,
144 0x9060, 0x0000007f, 0x00000020,
145 0x9508, 0x00010000, 0x00010000,
146 0xac14, 0x000003ff, 0x000000f3,
147 0xac0c, 0xffffffff, 0x00001032
150 static const u32 bonaire_mgcg_cgcg_init[] =
152 0xc420, 0xffffffff, 0xfffffffc,
153 0x30800, 0xffffffff, 0xe0000000,
154 0x3c2a0, 0xffffffff, 0x00000100,
155 0x3c208, 0xffffffff, 0x00000100,
156 0x3c2c0, 0xffffffff, 0xc0000100,
157 0x3c2c8, 0xffffffff, 0xc0000100,
158 0x3c2c4, 0xffffffff, 0xc0000100,
159 0x55e4, 0xffffffff, 0x00600100,
160 0x3c280, 0xffffffff, 0x00000100,
161 0x3c214, 0xffffffff, 0x06000100,
162 0x3c220, 0xffffffff, 0x00000100,
163 0x3c218, 0xffffffff, 0x06000100,
164 0x3c204, 0xffffffff, 0x00000100,
165 0x3c2e0, 0xffffffff, 0x00000100,
166 0x3c224, 0xffffffff, 0x00000100,
167 0x3c200, 0xffffffff, 0x00000100,
168 0x3c230, 0xffffffff, 0x00000100,
169 0x3c234, 0xffffffff, 0x00000100,
170 0x3c250, 0xffffffff, 0x00000100,
171 0x3c254, 0xffffffff, 0x00000100,
172 0x3c258, 0xffffffff, 0x00000100,
173 0x3c25c, 0xffffffff, 0x00000100,
174 0x3c260, 0xffffffff, 0x00000100,
175 0x3c27c, 0xffffffff, 0x00000100,
176 0x3c278, 0xffffffff, 0x00000100,
177 0x3c210, 0xffffffff, 0x06000100,
178 0x3c290, 0xffffffff, 0x00000100,
179 0x3c274, 0xffffffff, 0x00000100,
180 0x3c2b4, 0xffffffff, 0x00000100,
181 0x3c2b0, 0xffffffff, 0x00000100,
182 0x3c270, 0xffffffff, 0x00000100,
183 0x30800, 0xffffffff, 0xe0000000,
184 0x3c020, 0xffffffff, 0x00010000,
185 0x3c024, 0xffffffff, 0x00030002,
186 0x3c028, 0xffffffff, 0x00040007,
187 0x3c02c, 0xffffffff, 0x00060005,
188 0x3c030, 0xffffffff, 0x00090008,
189 0x3c034, 0xffffffff, 0x00010000,
190 0x3c038, 0xffffffff, 0x00030002,
191 0x3c03c, 0xffffffff, 0x00040007,
192 0x3c040, 0xffffffff, 0x00060005,
193 0x3c044, 0xffffffff, 0x00090008,
194 0x3c048, 0xffffffff, 0x00010000,
195 0x3c04c, 0xffffffff, 0x00030002,
196 0x3c050, 0xffffffff, 0x00040007,
197 0x3c054, 0xffffffff, 0x00060005,
198 0x3c058, 0xffffffff, 0x00090008,
199 0x3c05c, 0xffffffff, 0x00010000,
200 0x3c060, 0xffffffff, 0x00030002,
201 0x3c064, 0xffffffff, 0x00040007,
202 0x3c068, 0xffffffff, 0x00060005,
203 0x3c06c, 0xffffffff, 0x00090008,
204 0x3c070, 0xffffffff, 0x00010000,
205 0x3c074, 0xffffffff, 0x00030002,
206 0x3c078, 0xffffffff, 0x00040007,
207 0x3c07c, 0xffffffff, 0x00060005,
208 0x3c080, 0xffffffff, 0x00090008,
209 0x3c084, 0xffffffff, 0x00010000,
210 0x3c088, 0xffffffff, 0x00030002,
211 0x3c08c, 0xffffffff, 0x00040007,
212 0x3c090, 0xffffffff, 0x00060005,
213 0x3c094, 0xffffffff, 0x00090008,
214 0x3c098, 0xffffffff, 0x00010000,
215 0x3c09c, 0xffffffff, 0x00030002,
216 0x3c0a0, 0xffffffff, 0x00040007,
217 0x3c0a4, 0xffffffff, 0x00060005,
218 0x3c0a8, 0xffffffff, 0x00090008,
219 0x3c000, 0xffffffff, 0x96e00200,
220 0x8708, 0xffffffff, 0x00900100,
221 0xc424, 0xffffffff, 0x0020003f,
222 0x38, 0xffffffff, 0x0140001c,
223 0x3c, 0x000f0000, 0x000f0000,
224 0x220, 0xffffffff, 0xC060000C,
225 0x224, 0xc0000fff, 0x00000100,
226 0xf90, 0xffffffff, 0x00000100,
227 0xf98, 0x00000101, 0x00000000,
228 0x20a8, 0xffffffff, 0x00000104,
229 0x55e4, 0xff000fff, 0x00000100,
230 0x30cc, 0xc0000fff, 0x00000104,
231 0xc1e4, 0x00000001, 0x00000001,
232 0xd00c, 0xff000ff0, 0x00000100,
233 0xd80c, 0xff000ff0, 0x00000100
236 static const u32 spectre_golden_spm_registers[] =
238 0x30800, 0xe0ffffff, 0xe0000000
241 static const u32 spectre_golden_common_registers[] =
243 0xc770, 0xffffffff, 0x00000800,
244 0xc774, 0xffffffff, 0x00000800,
245 0xc798, 0xffffffff, 0x00007fbf,
246 0xc79c, 0xffffffff, 0x00007faf
249 static const u32 spectre_golden_registers[] =
251 0x3c000, 0xffff1fff, 0x96940200,
252 0x3c00c, 0xffff0001, 0xff000000,
253 0x3c200, 0xfffc0fff, 0x00000100,
254 0x6ed8, 0x00010101, 0x00010000,
255 0x9834, 0xf00fffff, 0x00000400,
256 0x9838, 0xfffffffc, 0x00020200,
257 0x5bb0, 0x000000f0, 0x00000070,
258 0x5bc0, 0xf0311fff, 0x80300000,
259 0x98f8, 0x73773777, 0x12010001,
260 0x9b7c, 0x00ff0000, 0x00fc0000,
261 0x2f48, 0x73773777, 0x12010001,
262 0x8a14, 0xf000003f, 0x00000007,
263 0x8b24, 0xffffffff, 0x00ffffff,
264 0x28350, 0x3f3f3fff, 0x00000082,
265 0x28355, 0x0000003f, 0x00000000,
266 0x3e78, 0x00000001, 0x00000002,
267 0x913c, 0xffff03df, 0x00000004,
268 0xc768, 0x00000008, 0x00000008,
269 0x8c00, 0x000008ff, 0x00000800,
270 0x9508, 0x00010000, 0x00010000,
271 0xac0c, 0xffffffff, 0x54763210,
272 0x214f8, 0x01ff01ff, 0x00000002,
273 0x21498, 0x007ff800, 0x00200000,
274 0x2015c, 0xffffffff, 0x00000f40,
275 0x30934, 0xffffffff, 0x00000001
278 static const u32 spectre_mgcg_cgcg_init[] =
280 0xc420, 0xffffffff, 0xfffffffc,
281 0x30800, 0xffffffff, 0xe0000000,
282 0x3c2a0, 0xffffffff, 0x00000100,
283 0x3c208, 0xffffffff, 0x00000100,
284 0x3c2c0, 0xffffffff, 0x00000100,
285 0x3c2c8, 0xffffffff, 0x00000100,
286 0x3c2c4, 0xffffffff, 0x00000100,
287 0x55e4, 0xffffffff, 0x00600100,
288 0x3c280, 0xffffffff, 0x00000100,
289 0x3c214, 0xffffffff, 0x06000100,
290 0x3c220, 0xffffffff, 0x00000100,
291 0x3c218, 0xffffffff, 0x06000100,
292 0x3c204, 0xffffffff, 0x00000100,
293 0x3c2e0, 0xffffffff, 0x00000100,
294 0x3c224, 0xffffffff, 0x00000100,
295 0x3c200, 0xffffffff, 0x00000100,
296 0x3c230, 0xffffffff, 0x00000100,
297 0x3c234, 0xffffffff, 0x00000100,
298 0x3c250, 0xffffffff, 0x00000100,
299 0x3c254, 0xffffffff, 0x00000100,
300 0x3c258, 0xffffffff, 0x00000100,
301 0x3c25c, 0xffffffff, 0x00000100,
302 0x3c260, 0xffffffff, 0x00000100,
303 0x3c27c, 0xffffffff, 0x00000100,
304 0x3c278, 0xffffffff, 0x00000100,
305 0x3c210, 0xffffffff, 0x06000100,
306 0x3c290, 0xffffffff, 0x00000100,
307 0x3c274, 0xffffffff, 0x00000100,
308 0x3c2b4, 0xffffffff, 0x00000100,
309 0x3c2b0, 0xffffffff, 0x00000100,
310 0x3c270, 0xffffffff, 0x00000100,
311 0x30800, 0xffffffff, 0xe0000000,
312 0x3c020, 0xffffffff, 0x00010000,
313 0x3c024, 0xffffffff, 0x00030002,
314 0x3c028, 0xffffffff, 0x00040007,
315 0x3c02c, 0xffffffff, 0x00060005,
316 0x3c030, 0xffffffff, 0x00090008,
317 0x3c034, 0xffffffff, 0x00010000,
318 0x3c038, 0xffffffff, 0x00030002,
319 0x3c03c, 0xffffffff, 0x00040007,
320 0x3c040, 0xffffffff, 0x00060005,
321 0x3c044, 0xffffffff, 0x00090008,
322 0x3c048, 0xffffffff, 0x00010000,
323 0x3c04c, 0xffffffff, 0x00030002,
324 0x3c050, 0xffffffff, 0x00040007,
325 0x3c054, 0xffffffff, 0x00060005,
326 0x3c058, 0xffffffff, 0x00090008,
327 0x3c05c, 0xffffffff, 0x00010000,
328 0x3c060, 0xffffffff, 0x00030002,
329 0x3c064, 0xffffffff, 0x00040007,
330 0x3c068, 0xffffffff, 0x00060005,
331 0x3c06c, 0xffffffff, 0x00090008,
332 0x3c070, 0xffffffff, 0x00010000,
333 0x3c074, 0xffffffff, 0x00030002,
334 0x3c078, 0xffffffff, 0x00040007,
335 0x3c07c, 0xffffffff, 0x00060005,
336 0x3c080, 0xffffffff, 0x00090008,
337 0x3c084, 0xffffffff, 0x00010000,
338 0x3c088, 0xffffffff, 0x00030002,
339 0x3c08c, 0xffffffff, 0x00040007,
340 0x3c090, 0xffffffff, 0x00060005,
341 0x3c094, 0xffffffff, 0x00090008,
342 0x3c098, 0xffffffff, 0x00010000,
343 0x3c09c, 0xffffffff, 0x00030002,
344 0x3c0a0, 0xffffffff, 0x00040007,
345 0x3c0a4, 0xffffffff, 0x00060005,
346 0x3c0a8, 0xffffffff, 0x00090008,
347 0x3c0ac, 0xffffffff, 0x00010000,
348 0x3c0b0, 0xffffffff, 0x00030002,
349 0x3c0b4, 0xffffffff, 0x00040007,
350 0x3c0b8, 0xffffffff, 0x00060005,
351 0x3c0bc, 0xffffffff, 0x00090008,
352 0x3c000, 0xffffffff, 0x96e00200,
353 0x8708, 0xffffffff, 0x00900100,
354 0xc424, 0xffffffff, 0x0020003f,
355 0x38, 0xffffffff, 0x0140001c,
356 0x3c, 0x000f0000, 0x000f0000,
357 0x220, 0xffffffff, 0xC060000C,
358 0x224, 0xc0000fff, 0x00000100,
359 0xf90, 0xffffffff, 0x00000100,
360 0xf98, 0x00000101, 0x00000000,
361 0x20a8, 0xffffffff, 0x00000104,
362 0x55e4, 0xff000fff, 0x00000100,
363 0x30cc, 0xc0000fff, 0x00000104,
364 0xc1e4, 0x00000001, 0x00000001,
365 0xd00c, 0xff000ff0, 0x00000100,
366 0xd80c, 0xff000ff0, 0x00000100
369 static const u32 kalindi_golden_spm_registers[] =
371 0x30800, 0xe0ffffff, 0xe0000000
374 static const u32 kalindi_golden_common_registers[] =
376 0xc770, 0xffffffff, 0x00000800,
377 0xc774, 0xffffffff, 0x00000800,
378 0xc798, 0xffffffff, 0x00007fbf,
379 0xc79c, 0xffffffff, 0x00007faf
382 static const u32 kalindi_golden_registers[] =
384 0x3c000, 0xffffdfff, 0x6e944040,
385 0x55e4, 0xff607fff, 0xfc000100,
386 0x3c220, 0xff000fff, 0x00000100,
387 0x3c224, 0xff000fff, 0x00000100,
388 0x3c200, 0xfffc0fff, 0x00000100,
389 0x6ed8, 0x00010101, 0x00010000,
390 0x9830, 0xffffffff, 0x00000000,
391 0x9834, 0xf00fffff, 0x00000400,
392 0x5bb0, 0x000000f0, 0x00000070,
393 0x5bc0, 0xf0311fff, 0x80300000,
394 0x98f8, 0x73773777, 0x12010001,
395 0x98fc, 0xffffffff, 0x00000010,
396 0x9b7c, 0x00ff0000, 0x00fc0000,
397 0x8030, 0x00001f0f, 0x0000100a,
398 0x2f48, 0x73773777, 0x12010001,
399 0x2408, 0x000fffff, 0x000c007f,
400 0x8a14, 0xf000003f, 0x00000007,
401 0x8b24, 0x3fff3fff, 0x00ffcfff,
402 0x30a04, 0x0000ff0f, 0x00000000,
403 0x28a4c, 0x07ffffff, 0x06000000,
404 0x4d8, 0x00000fff, 0x00000100,
405 0x3e78, 0x00000001, 0x00000002,
406 0xc768, 0x00000008, 0x00000008,
407 0x8c00, 0x000000ff, 0x00000003,
408 0x214f8, 0x01ff01ff, 0x00000002,
409 0x21498, 0x007ff800, 0x00200000,
410 0x2015c, 0xffffffff, 0x00000f40,
411 0x88c4, 0x001f3ae3, 0x00000082,
412 0x88d4, 0x0000001f, 0x00000010,
413 0x30934, 0xffffffff, 0x00000000
416 static const u32 kalindi_mgcg_cgcg_init[] =
418 0xc420, 0xffffffff, 0xfffffffc,
419 0x30800, 0xffffffff, 0xe0000000,
420 0x3c2a0, 0xffffffff, 0x00000100,
421 0x3c208, 0xffffffff, 0x00000100,
422 0x3c2c0, 0xffffffff, 0x00000100,
423 0x3c2c8, 0xffffffff, 0x00000100,
424 0x3c2c4, 0xffffffff, 0x00000100,
425 0x55e4, 0xffffffff, 0x00600100,
426 0x3c280, 0xffffffff, 0x00000100,
427 0x3c214, 0xffffffff, 0x06000100,
428 0x3c220, 0xffffffff, 0x00000100,
429 0x3c218, 0xffffffff, 0x06000100,
430 0x3c204, 0xffffffff, 0x00000100,
431 0x3c2e0, 0xffffffff, 0x00000100,
432 0x3c224, 0xffffffff, 0x00000100,
433 0x3c200, 0xffffffff, 0x00000100,
434 0x3c230, 0xffffffff, 0x00000100,
435 0x3c234, 0xffffffff, 0x00000100,
436 0x3c250, 0xffffffff, 0x00000100,
437 0x3c254, 0xffffffff, 0x00000100,
438 0x3c258, 0xffffffff, 0x00000100,
439 0x3c25c, 0xffffffff, 0x00000100,
440 0x3c260, 0xffffffff, 0x00000100,
441 0x3c27c, 0xffffffff, 0x00000100,
442 0x3c278, 0xffffffff, 0x00000100,
443 0x3c210, 0xffffffff, 0x06000100,
444 0x3c290, 0xffffffff, 0x00000100,
445 0x3c274, 0xffffffff, 0x00000100,
446 0x3c2b4, 0xffffffff, 0x00000100,
447 0x3c2b0, 0xffffffff, 0x00000100,
448 0x3c270, 0xffffffff, 0x00000100,
449 0x30800, 0xffffffff, 0xe0000000,
450 0x3c020, 0xffffffff, 0x00010000,
451 0x3c024, 0xffffffff, 0x00030002,
452 0x3c028, 0xffffffff, 0x00040007,
453 0x3c02c, 0xffffffff, 0x00060005,
454 0x3c030, 0xffffffff, 0x00090008,
455 0x3c034, 0xffffffff, 0x00010000,
456 0x3c038, 0xffffffff, 0x00030002,
457 0x3c03c, 0xffffffff, 0x00040007,
458 0x3c040, 0xffffffff, 0x00060005,
459 0x3c044, 0xffffffff, 0x00090008,
460 0x3c000, 0xffffffff, 0x96e00200,
461 0x8708, 0xffffffff, 0x00900100,
462 0xc424, 0xffffffff, 0x0020003f,
463 0x38, 0xffffffff, 0x0140001c,
464 0x3c, 0x000f0000, 0x000f0000,
465 0x220, 0xffffffff, 0xC060000C,
466 0x224, 0xc0000fff, 0x00000100,
467 0x20a8, 0xffffffff, 0x00000104,
468 0x55e4, 0xff000fff, 0x00000100,
469 0x30cc, 0xc0000fff, 0x00000104,
470 0xc1e4, 0x00000001, 0x00000001,
471 0xd00c, 0xff000ff0, 0x00000100,
472 0xd80c, 0xff000ff0, 0x00000100
475 static void cik_init_golden_registers(struct radeon_device *rdev)
477 switch (rdev->family) {
479 radeon_program_register_sequence(rdev,
480 bonaire_mgcg_cgcg_init,
481 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
482 radeon_program_register_sequence(rdev,
483 bonaire_golden_registers,
484 (const u32)ARRAY_SIZE(bonaire_golden_registers));
485 radeon_program_register_sequence(rdev,
486 bonaire_golden_common_registers,
487 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
488 radeon_program_register_sequence(rdev,
489 bonaire_golden_spm_registers,
490 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
493 radeon_program_register_sequence(rdev,
494 kalindi_mgcg_cgcg_init,
495 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
496 radeon_program_register_sequence(rdev,
497 kalindi_golden_registers,
498 (const u32)ARRAY_SIZE(kalindi_golden_registers));
499 radeon_program_register_sequence(rdev,
500 kalindi_golden_common_registers,
501 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
502 radeon_program_register_sequence(rdev,
503 kalindi_golden_spm_registers,
504 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
507 radeon_program_register_sequence(rdev,
508 spectre_mgcg_cgcg_init,
509 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
510 radeon_program_register_sequence(rdev,
511 spectre_golden_registers,
512 (const u32)ARRAY_SIZE(spectre_golden_registers));
513 radeon_program_register_sequence(rdev,
514 spectre_golden_common_registers,
515 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
516 radeon_program_register_sequence(rdev,
517 spectre_golden_spm_registers,
518 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
526 * cik_get_xclk - get the xclk
528 * @rdev: radeon_device pointer
530 * Returns the reference clock used by the gfx engine
533 u32 cik_get_xclk(struct radeon_device *rdev)
535 u32 reference_clock = rdev->clock.spll.reference_freq;
537 if (rdev->flags & RADEON_IS_IGP) {
538 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
539 return reference_clock / 2;
541 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
542 return reference_clock / 4;
544 return reference_clock;
548 * cik_mm_rdoorbell - read a doorbell dword
550 * @rdev: radeon_device pointer
551 * @offset: byte offset into the aperture
553 * Returns the value in the doorbell aperture at the
554 * requested offset (CIK).
556 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
558 if (offset < rdev->doorbell.size) {
559 return readl(((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
561 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
567 * cik_mm_wdoorbell - write a doorbell dword
569 * @rdev: radeon_device pointer
570 * @offset: byte offset into the aperture
573 * Writes @v to the doorbell aperture at the
574 * requested offset (CIK).
576 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
578 if (offset < rdev->doorbell.size) {
579 writel(v, ((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
581 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
585 #define BONAIRE_IO_MC_REGS_SIZE 36
587 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
589 {0x00000070, 0x04400000},
590 {0x00000071, 0x80c01803},
591 {0x00000072, 0x00004004},
592 {0x00000073, 0x00000100},
593 {0x00000074, 0x00ff0000},
594 {0x00000075, 0x34000000},
595 {0x00000076, 0x08000014},
596 {0x00000077, 0x00cc08ec},
597 {0x00000078, 0x00000400},
598 {0x00000079, 0x00000000},
599 {0x0000007a, 0x04090000},
600 {0x0000007c, 0x00000000},
601 {0x0000007e, 0x4408a8e8},
602 {0x0000007f, 0x00000304},
603 {0x00000080, 0x00000000},
604 {0x00000082, 0x00000001},
605 {0x00000083, 0x00000002},
606 {0x00000084, 0xf3e4f400},
607 {0x00000085, 0x052024e3},
608 {0x00000087, 0x00000000},
609 {0x00000088, 0x01000000},
610 {0x0000008a, 0x1c0a0000},
611 {0x0000008b, 0xff010000},
612 {0x0000008d, 0xffffefff},
613 {0x0000008e, 0xfff3efff},
614 {0x0000008f, 0xfff3efbf},
615 {0x00000092, 0xf7ffffff},
616 {0x00000093, 0xffffff7f},
617 {0x00000095, 0x00101101},
618 {0x00000096, 0x00000fff},
619 {0x00000097, 0x00116fff},
620 {0x00000098, 0x60010000},
621 {0x00000099, 0x10010000},
622 {0x0000009a, 0x00006000},
623 {0x0000009b, 0x00001000},
624 {0x0000009f, 0x00b48000}
628 * cik_srbm_select - select specific register instances
630 * @rdev: radeon_device pointer
631 * @me: selected ME (micro engine)
636 * Switches the currently active registers instances. Some
637 * registers are instanced per VMID, others are instanced per
638 * me/pipe/queue combination.
640 static void cik_srbm_select(struct radeon_device *rdev,
641 u32 me, u32 pipe, u32 queue, u32 vmid)
643 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
646 QUEUEID(queue & 0x7));
647 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
652 * ci_mc_load_microcode - load MC ucode into the hw
654 * @rdev: radeon_device pointer
656 * Load the GDDR MC ucode into the hw (CIK).
657 * Returns 0 on success, error on failure.
659 static __unused int ci_mc_load_microcode(struct radeon_device *rdev)
661 const __be32 *fw_data;
662 u32 running, blackout = 0;
664 int i, ucode_size, regs_size;
669 switch (rdev->family) {
672 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
673 ucode_size = CIK_MC_UCODE_SIZE;
674 regs_size = BONAIRE_IO_MC_REGS_SIZE;
678 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
682 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
683 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
686 /* reset the engine and set to writable */
687 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
688 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
690 /* load mc io regs */
691 for (i = 0; i < regs_size; i++) {
692 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
693 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
695 /* load the MC ucode */
696 fw_data = (const __be32 *)rdev->mc_fw->data;
697 for (i = 0; i < ucode_size; i++)
698 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
700 /* put the engine back into the active state */
701 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
702 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
703 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
705 /* wait for training to complete */
706 for (i = 0; i < rdev->usec_timeout; i++) {
707 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
711 for (i = 0; i < rdev->usec_timeout; i++) {
712 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
718 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
725 * cik_init_microcode - load ucode images from disk
727 * @rdev: radeon_device pointer
729 * Use the firmware interface to load the ucode images into
730 * the driver (not loaded into hw).
731 * Returns 0 on success, error on failure.
733 static int cik_init_microcode(struct radeon_device *rdev)
735 const char *chip_name;
736 size_t pfp_req_size, me_req_size, ce_req_size,
737 mec_req_size, rlc_req_size, mc_req_size,
744 switch (rdev->family) {
746 chip_name = "BONAIRE";
747 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
748 me_req_size = CIK_ME_UCODE_SIZE * 4;
749 ce_req_size = CIK_CE_UCODE_SIZE * 4;
750 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
751 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
752 mc_req_size = CIK_MC_UCODE_SIZE * 4;
753 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
756 chip_name = "KAVERI";
757 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
758 me_req_size = CIK_ME_UCODE_SIZE * 4;
759 ce_req_size = CIK_CE_UCODE_SIZE * 4;
760 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
761 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
762 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
765 chip_name = "KABINI";
766 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767 me_req_size = CIK_ME_UCODE_SIZE * 4;
768 ce_req_size = CIK_CE_UCODE_SIZE * 4;
769 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
771 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
776 DRM_INFO("Loading %s Microcode\n", chip_name);
778 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
779 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
782 if (rdev->pfp_fw->datasize != pfp_req_size) {
784 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
785 rdev->pfp_fw->datasize, fw_name);
790 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
791 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
794 if (rdev->me_fw->datasize != me_req_size) {
796 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
797 rdev->me_fw->datasize, fw_name);
801 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
802 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
805 if (rdev->ce_fw->datasize != ce_req_size) {
807 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
808 rdev->ce_fw->datasize, fw_name);
812 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
813 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
816 if (rdev->mec_fw->datasize != mec_req_size) {
818 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
819 rdev->mec_fw->datasize, fw_name);
823 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
824 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
827 if (rdev->rlc_fw->datasize != rlc_req_size) {
829 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
830 rdev->rlc_fw->datasize, fw_name);
834 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
835 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
838 if (rdev->sdma_fw->datasize != sdma_req_size) {
840 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
841 rdev->sdma_fw->datasize, fw_name);
845 /* No MC ucode on APUs */
846 if (!(rdev->flags & RADEON_IS_IGP)) {
847 ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
848 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
851 if (rdev->mc_fw->datasize != mc_req_size) {
853 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
854 rdev->mc_fw->datasize, fw_name);
863 "cik_cp: Failed to load firmware \"%s\"\n",
865 release_firmware(rdev->pfp_fw);
867 release_firmware(rdev->me_fw);
869 release_firmware(rdev->ce_fw);
871 release_firmware(rdev->mec_fw);
873 release_firmware(rdev->rlc_fw);
875 release_firmware(rdev->sdma_fw);
876 rdev->sdma_fw = NULL;
877 release_firmware(rdev->mc_fw);
887 * cik_tiling_mode_table_init - init the hw tiling table
889 * @rdev: radeon_device pointer
891 * Starting with SI, the tiling setup is done globally in a
892 * set of 32 tiling modes. Rather than selecting each set of
893 * parameters per surface as on older asics, we just select
894 * which index in the tiling table we want to use, and the
895 * surface uses those parameters (CIK).
897 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
899 const u32 num_tile_mode_states = 32;
900 const u32 num_secondary_tile_mode_states = 16;
901 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
902 u32 num_pipe_configs;
903 u32 num_rbs = rdev->config.cik.max_backends_per_se *
904 rdev->config.cik.max_shader_engines;
906 switch (rdev->config.cik.mem_row_size_in_kb) {
908 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
912 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
915 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
919 num_pipe_configs = rdev->config.cik.max_tile_pipes;
920 if (num_pipe_configs > 8)
921 num_pipe_configs = 8; /* ??? */
923 if (num_pipe_configs == 8) {
924 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
925 switch (reg_offset) {
927 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
928 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
930 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
933 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
934 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
935 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
936 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
939 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
940 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
942 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
945 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
946 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
947 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
948 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
951 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
952 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
953 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
954 TILE_SPLIT(split_equal_to_row_size));
957 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
958 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
961 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
963 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
964 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
967 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
968 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
970 TILE_SPLIT(split_equal_to_row_size));
973 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
977 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
978 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
981 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
982 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
984 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
987 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
988 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
989 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
993 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
994 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
995 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
999 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1000 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1004 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1009 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1010 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1011 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1015 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1016 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1021 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1022 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1025 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1026 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1027 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1032 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1033 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1037 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1038 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1039 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1047 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1049 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1050 switch (reg_offset) {
1052 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1053 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1054 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1055 NUM_BANKS(ADDR_SURF_16_BANK));
1058 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1059 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1060 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1061 NUM_BANKS(ADDR_SURF_16_BANK));
1064 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1065 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1066 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1067 NUM_BANKS(ADDR_SURF_16_BANK));
1070 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1071 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1072 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1073 NUM_BANKS(ADDR_SURF_16_BANK));
1076 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1077 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1078 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1079 NUM_BANKS(ADDR_SURF_8_BANK));
1082 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1085 NUM_BANKS(ADDR_SURF_4_BANK));
1088 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1091 NUM_BANKS(ADDR_SURF_2_BANK));
1094 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1095 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1096 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1097 NUM_BANKS(ADDR_SURF_16_BANK));
1100 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1101 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1102 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1103 NUM_BANKS(ADDR_SURF_16_BANK));
1106 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1107 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1108 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1109 NUM_BANKS(ADDR_SURF_16_BANK));
1112 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1115 NUM_BANKS(ADDR_SURF_16_BANK));
1118 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1121 NUM_BANKS(ADDR_SURF_8_BANK));
1124 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1125 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1126 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1127 NUM_BANKS(ADDR_SURF_4_BANK));
1130 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1133 NUM_BANKS(ADDR_SURF_2_BANK));
1139 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1141 } else if (num_pipe_configs == 4) {
1143 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1144 switch (reg_offset) {
1146 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1148 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1149 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1152 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1153 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1154 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1155 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1158 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1159 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1160 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1161 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1164 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1165 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1166 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1170 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1171 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1172 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1173 TILE_SPLIT(split_equal_to_row_size));
1176 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1177 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1181 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1182 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1186 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1188 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189 TILE_SPLIT(split_equal_to_row_size));
1192 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1193 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1196 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1197 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1200 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1201 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1202 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1206 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1207 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1208 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1212 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1213 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1214 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1218 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1219 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1222 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1223 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1224 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1228 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1229 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1230 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1234 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1235 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1236 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1240 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1241 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1244 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1245 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1246 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1250 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1251 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1252 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1256 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1257 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1258 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1265 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1266 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1268 } else if (num_rbs < 4) {
1269 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1270 switch (reg_offset) {
1272 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1273 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1274 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1278 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1280 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1284 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1286 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1287 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1290 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1291 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1292 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1296 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1298 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1299 TILE_SPLIT(split_equal_to_row_size));
1302 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1303 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1306 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1307 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1308 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1312 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1314 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1315 TILE_SPLIT(split_equal_to_row_size));
1318 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1319 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1322 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1323 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1326 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1327 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1328 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1332 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1333 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1334 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1338 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1339 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1340 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1344 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1349 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1350 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1354 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1355 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1356 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1360 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1361 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1362 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1366 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1367 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1371 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1372 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1377 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1378 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1382 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1384 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1391 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1392 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1395 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1396 switch (reg_offset) {
1398 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1401 NUM_BANKS(ADDR_SURF_16_BANK));
1404 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1407 NUM_BANKS(ADDR_SURF_16_BANK));
1410 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1413 NUM_BANKS(ADDR_SURF_16_BANK));
1416 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1419 NUM_BANKS(ADDR_SURF_16_BANK));
1422 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1425 NUM_BANKS(ADDR_SURF_16_BANK));
1428 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1431 NUM_BANKS(ADDR_SURF_8_BANK));
1434 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1437 NUM_BANKS(ADDR_SURF_4_BANK));
1440 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1443 NUM_BANKS(ADDR_SURF_16_BANK));
1446 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1449 NUM_BANKS(ADDR_SURF_16_BANK));
1452 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1455 NUM_BANKS(ADDR_SURF_16_BANK));
1458 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1461 NUM_BANKS(ADDR_SURF_16_BANK));
1464 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1467 NUM_BANKS(ADDR_SURF_16_BANK));
1470 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1473 NUM_BANKS(ADDR_SURF_8_BANK));
1476 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1479 NUM_BANKS(ADDR_SURF_4_BANK));
1485 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1487 } else if (num_pipe_configs == 2) {
1488 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1489 switch (reg_offset) {
1491 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1492 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1493 PIPE_CONFIG(ADDR_SURF_P2) |
1494 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1497 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1498 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1499 PIPE_CONFIG(ADDR_SURF_P2) |
1500 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1503 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1504 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1505 PIPE_CONFIG(ADDR_SURF_P2) |
1506 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1509 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1510 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1511 PIPE_CONFIG(ADDR_SURF_P2) |
1512 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1515 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1516 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1517 PIPE_CONFIG(ADDR_SURF_P2) |
1518 TILE_SPLIT(split_equal_to_row_size));
1521 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1522 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1525 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1527 PIPE_CONFIG(ADDR_SURF_P2) |
1528 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1531 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1532 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1533 PIPE_CONFIG(ADDR_SURF_P2) |
1534 TILE_SPLIT(split_equal_to_row_size));
1537 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1540 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1541 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1544 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1545 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1546 PIPE_CONFIG(ADDR_SURF_P2) |
1547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1550 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1551 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1552 PIPE_CONFIG(ADDR_SURF_P2) |
1553 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1556 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1557 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1558 PIPE_CONFIG(ADDR_SURF_P2) |
1559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1562 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1563 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1566 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1567 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1568 PIPE_CONFIG(ADDR_SURF_P2) |
1569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1572 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1573 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1574 PIPE_CONFIG(ADDR_SURF_P2) |
1575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1578 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1579 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1580 PIPE_CONFIG(ADDR_SURF_P2) |
1581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1584 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1585 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1588 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1589 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1590 PIPE_CONFIG(ADDR_SURF_P2) |
1591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1594 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1595 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1596 PIPE_CONFIG(ADDR_SURF_P2) |
1597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1600 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1601 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1602 PIPE_CONFIG(ADDR_SURF_P2) |
1603 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1609 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1610 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1612 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1613 switch (reg_offset) {
1615 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1616 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1617 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1618 NUM_BANKS(ADDR_SURF_16_BANK));
1621 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1622 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1623 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1624 NUM_BANKS(ADDR_SURF_16_BANK));
1627 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1628 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1629 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1630 NUM_BANKS(ADDR_SURF_16_BANK));
1633 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1636 NUM_BANKS(ADDR_SURF_16_BANK));
1639 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1642 NUM_BANKS(ADDR_SURF_16_BANK));
1645 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1648 NUM_BANKS(ADDR_SURF_16_BANK));
1651 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1652 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1653 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1654 NUM_BANKS(ADDR_SURF_8_BANK));
1657 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1658 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1659 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1660 NUM_BANKS(ADDR_SURF_16_BANK));
1663 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1664 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1665 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1666 NUM_BANKS(ADDR_SURF_16_BANK));
1669 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1670 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1671 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1672 NUM_BANKS(ADDR_SURF_16_BANK));
1675 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1676 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1677 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1678 NUM_BANKS(ADDR_SURF_16_BANK));
1681 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1682 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1683 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1684 NUM_BANKS(ADDR_SURF_16_BANK));
1687 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1688 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1689 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1690 NUM_BANKS(ADDR_SURF_16_BANK));
1693 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1694 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1695 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1696 NUM_BANKS(ADDR_SURF_8_BANK));
1702 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1705 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1709 * cik_select_se_sh - select which SE, SH to address
1711 * @rdev: radeon_device pointer
1712 * @se_num: shader engine to address
1713 * @sh_num: sh block to address
1715 * Select which SE, SH combinations to address. Certain
1716 * registers are instanced per SE or SH. 0xffffffff means
1717 * broadcast to all SEs or SHs (CIK).
1719 static void cik_select_se_sh(struct radeon_device *rdev,
1720 u32 se_num, u32 sh_num)
1722 u32 data = INSTANCE_BROADCAST_WRITES;
1724 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1725 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1726 else if (se_num == 0xffffffff)
1727 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1728 else if (sh_num == 0xffffffff)
1729 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1731 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1732 WREG32(GRBM_GFX_INDEX, data);
1736 * cik_create_bitmask - create a bitmask
1738 * @bit_width: length of the mask
1740 * create a variable length bit mask (CIK).
1741 * Returns the bitmask.
1743 static u32 cik_create_bitmask(u32 bit_width)
1747 for (i = 0; i < bit_width; i++) {
1755 * cik_select_se_sh - select which SE, SH to address
1757 * @rdev: radeon_device pointer
1758 * @max_rb_num: max RBs (render backends) for the asic
1759 * @se_num: number of SEs (shader engines) for the asic
1760 * @sh_per_se: number of SH blocks per SE for the asic
1762 * Calculates the bitmask of disabled RBs (CIK).
1763 * Returns the disabled RB bitmask.
1765 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1766 u32 max_rb_num, u32 se_num,
1771 data = RREG32(CC_RB_BACKEND_DISABLE);
1773 data &= BACKEND_DISABLE_MASK;
1776 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1778 data >>= BACKEND_DISABLE_SHIFT;
1780 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1786 * cik_setup_rb - setup the RBs on the asic
1788 * @rdev: radeon_device pointer
1789 * @se_num: number of SEs (shader engines) for the asic
1790 * @sh_per_se: number of SH blocks per SE for the asic
1791 * @max_rb_num: max RBs (render backends) for the asic
1793 * Configures per-SE/SH RB registers (CIK).
1795 static void cik_setup_rb(struct radeon_device *rdev,
1796 u32 se_num, u32 sh_per_se,
1801 u32 disabled_rbs = 0;
1802 u32 enabled_rbs = 0;
1804 for (i = 0; i < se_num; i++) {
1805 for (j = 0; j < sh_per_se; j++) {
1806 cik_select_se_sh(rdev, i, j);
1807 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1808 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1811 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1814 for (i = 0; i < max_rb_num; i++) {
1815 if (!(disabled_rbs & mask))
1816 enabled_rbs |= mask;
1820 for (i = 0; i < se_num; i++) {
1821 cik_select_se_sh(rdev, i, 0xffffffff);
1823 for (j = 0; j < sh_per_se; j++) {
1824 switch (enabled_rbs & 3) {
1826 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1829 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1833 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1838 WREG32(PA_SC_RASTER_CONFIG, data);
1840 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1844 * cik_gpu_init - setup the 3D engine
1846 * @rdev: radeon_device pointer
1848 * Configures the 3D engine and tiling configuration
1849 * registers so that the 3D engine is usable.
1851 static __unused void cik_gpu_init(struct radeon_device *rdev)
1853 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1854 u32 mc_shared_chmap, mc_arb_ramcfg;
1855 u32 hdp_host_path_cntl;
1859 switch (rdev->family) {
1861 rdev->config.cik.max_shader_engines = 2;
1862 rdev->config.cik.max_tile_pipes = 4;
1863 rdev->config.cik.max_cu_per_sh = 7;
1864 rdev->config.cik.max_sh_per_se = 1;
1865 rdev->config.cik.max_backends_per_se = 2;
1866 rdev->config.cik.max_texture_channel_caches = 4;
1867 rdev->config.cik.max_gprs = 256;
1868 rdev->config.cik.max_gs_threads = 32;
1869 rdev->config.cik.max_hw_contexts = 8;
1871 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1872 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1873 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1874 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1875 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1882 rdev->config.cik.max_shader_engines = 1;
1883 rdev->config.cik.max_tile_pipes = 2;
1884 rdev->config.cik.max_cu_per_sh = 2;
1885 rdev->config.cik.max_sh_per_se = 1;
1886 rdev->config.cik.max_backends_per_se = 1;
1887 rdev->config.cik.max_texture_channel_caches = 2;
1888 rdev->config.cik.max_gprs = 256;
1889 rdev->config.cik.max_gs_threads = 16;
1890 rdev->config.cik.max_hw_contexts = 8;
1892 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1893 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1894 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1895 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1896 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1900 /* Initialize HDP */
1901 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1902 WREG32((0x2c14 + j), 0x00000000);
1903 WREG32((0x2c18 + j), 0x00000000);
1904 WREG32((0x2c1c + j), 0x00000000);
1905 WREG32((0x2c20 + j), 0x00000000);
1906 WREG32((0x2c24 + j), 0x00000000);
1909 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1911 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1913 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1914 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1916 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1917 rdev->config.cik.mem_max_burst_length_bytes = 256;
1918 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1919 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1920 if (rdev->config.cik.mem_row_size_in_kb > 4)
1921 rdev->config.cik.mem_row_size_in_kb = 4;
1922 /* XXX use MC settings? */
1923 rdev->config.cik.shader_engine_tile_size = 32;
1924 rdev->config.cik.num_gpus = 1;
1925 rdev->config.cik.multi_gpu_tile_size = 64;
1927 /* fix up row size */
1928 gb_addr_config &= ~ROW_SIZE_MASK;
1929 switch (rdev->config.cik.mem_row_size_in_kb) {
1932 gb_addr_config |= ROW_SIZE(0);
1935 gb_addr_config |= ROW_SIZE(1);
1938 gb_addr_config |= ROW_SIZE(2);
1942 /* setup tiling info dword. gb_addr_config is not adequate since it does
1943 * not have bank info, so create a custom tiling dword.
1944 * bits 3:0 num_pipes
1945 * bits 7:4 num_banks
1946 * bits 11:8 group_size
1947 * bits 15:12 row_size
1949 rdev->config.cik.tile_config = 0;
1950 switch (rdev->config.cik.num_tile_pipes) {
1952 rdev->config.cik.tile_config |= (0 << 0);
1955 rdev->config.cik.tile_config |= (1 << 0);
1958 rdev->config.cik.tile_config |= (2 << 0);
1962 /* XXX what about 12? */
1963 rdev->config.cik.tile_config |= (3 << 0);
1966 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1967 rdev->config.cik.tile_config |= 1 << 4;
1969 rdev->config.cik.tile_config |= 0 << 4;
1970 rdev->config.cik.tile_config |=
1971 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1972 rdev->config.cik.tile_config |=
1973 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1975 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1976 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1977 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1978 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1979 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1980 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1981 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1982 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1984 cik_tiling_mode_table_init(rdev);
1986 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1987 rdev->config.cik.max_sh_per_se,
1988 rdev->config.cik.max_backends_per_se);
1990 /* set HW defaults for 3D engine */
1991 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1993 WREG32(SX_DEBUG_1, 0x20);
1995 WREG32(TA_CNTL_AUX, 0x00010000);
1997 tmp = RREG32(SPI_CONFIG_CNTL);
1999 WREG32(SPI_CONFIG_CNTL, tmp);
2001 WREG32(SQ_CONFIG, 1);
2003 WREG32(DB_DEBUG, 0);
2005 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2007 WREG32(DB_DEBUG2, tmp);
2009 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2011 WREG32(DB_DEBUG3, tmp);
2013 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2015 WREG32(CB_HW_CONTROL, tmp);
2017 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2019 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2020 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2021 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2022 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2024 WREG32(VGT_NUM_INSTANCES, 1);
2026 WREG32(CP_PERFMON_CNTL, 0);
2028 WREG32(SQ_CONFIG, 0);
2030 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2031 FORCE_EOV_MAX_REZ_CNT(255)));
2033 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2034 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2036 WREG32(VGT_GS_VERTEX_REUSE, 16);
2037 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2039 tmp = RREG32(HDP_MISC_CNTL);
2040 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2041 WREG32(HDP_MISC_CNTL, tmp);
2043 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2044 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2046 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2047 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2053 * GPU scratch registers helpers function.
2056 * cik_scratch_init - setup driver info for CP scratch regs
2058 * @rdev: radeon_device pointer
2060 * Set up the number and offset of the CP scratch registers.
2061 * NOTE: use of CP scratch registers is a legacy inferface and
2062 * is not used by default on newer asics (r6xx+). On newer asics,
2063 * memory buffers are used for fences rather than scratch regs.
2065 static __unused void cik_scratch_init(struct radeon_device *rdev)
2069 rdev->scratch.num_reg = 7;
2070 rdev->scratch.reg_base = SCRATCH_REG0;
2071 for (i = 0; i < rdev->scratch.num_reg; i++) {
2072 rdev->scratch.free[i] = true;
2073 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2078 * cik_ring_test - basic gfx ring test
2080 * @rdev: radeon_device pointer
2081 * @ring: radeon_ring structure holding ring information
2083 * Allocate a scratch register and write to it using the gfx ring (CIK).
2084 * Provides a basic gfx ring test to verify that the ring is working.
2085 * Used by cik_cp_gfx_resume();
2086 * Returns 0 on success, error on failure.
2088 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2095 r = radeon_scratch_get(rdev, &scratch);
2097 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2100 WREG32(scratch, 0xCAFEDEAD);
2101 r = radeon_ring_lock(rdev, ring, 3);
2103 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2104 radeon_scratch_free(rdev, scratch);
2107 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2108 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2109 radeon_ring_write(ring, 0xDEADBEEF);
2110 radeon_ring_unlock_commit(rdev, ring);
2112 for (i = 0; i < rdev->usec_timeout; i++) {
2113 tmp = RREG32(scratch);
2114 if (tmp == 0xDEADBEEF)
2118 if (i < rdev->usec_timeout) {
2119 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2121 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2122 ring->idx, scratch, tmp);
2125 radeon_scratch_free(rdev, scratch);
2130 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2132 * @rdev: radeon_device pointer
2133 * @fence: radeon fence object
2135 * Emits a fence sequnce number on the gfx ring and flushes
2138 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2139 struct radeon_fence *fence)
2141 struct radeon_ring *ring = &rdev->ring[fence->ring];
2142 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2144 /* EVENT_WRITE_EOP - flush caches, send int */
2145 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2146 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2148 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2150 radeon_ring_write(ring, addr & 0xfffffffc);
2151 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2152 radeon_ring_write(ring, fence->seq);
2153 radeon_ring_write(ring, 0);
2155 /* We should be using the new WAIT_REG_MEM special op packet here
2156 * but it causes the CP to hang
2158 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2159 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2160 WRITE_DATA_DST_SEL(0)));
2161 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2162 radeon_ring_write(ring, 0);
2163 radeon_ring_write(ring, 0);
2167 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2169 * @rdev: radeon_device pointer
2170 * @fence: radeon fence object
2172 * Emits a fence sequnce number on the compute ring and flushes
2175 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2176 struct radeon_fence *fence)
2178 struct radeon_ring *ring = &rdev->ring[fence->ring];
2179 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2181 /* RELEASE_MEM - flush caches, send int */
2182 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2183 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2185 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2187 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2188 radeon_ring_write(ring, addr & 0xfffffffc);
2189 radeon_ring_write(ring, upper_32_bits(addr));
2190 radeon_ring_write(ring, fence->seq);
2191 radeon_ring_write(ring, 0);
2193 /* We should be using the new WAIT_REG_MEM special op packet here
2194 * but it causes the CP to hang
2196 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2197 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2198 WRITE_DATA_DST_SEL(0)));
2199 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2200 radeon_ring_write(ring, 0);
2201 radeon_ring_write(ring, 0);
2204 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2205 struct radeon_ring *ring,
2206 struct radeon_semaphore *semaphore,
2209 uint64_t addr = semaphore->gpu_addr;
2210 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2212 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2213 radeon_ring_write(ring, addr & 0xffffffff);
2214 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2221 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2223 * @rdev: radeon_device pointer
2224 * @ib: radeon indirect buffer object
2226 * Emits an DE (drawing engine) or CE (constant engine) IB
2227 * on the gfx ring. IBs are usually generated by userspace
2228 * acceleration drivers and submitted to the kernel for
2229 * sheduling on the ring. This function schedules the IB
2230 * on the gfx ring for execution by the GPU.
2232 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2234 struct radeon_ring *ring = &rdev->ring[ib->ring];
2235 u32 header, control = INDIRECT_BUFFER_VALID;
2237 if (ib->is_const_ib) {
2238 /* set switch buffer packet before const IB */
2239 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2240 radeon_ring_write(ring, 0);
2242 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2245 if (ring->rptr_save_reg) {
2246 next_rptr = ring->wptr + 3 + 4;
2247 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2248 radeon_ring_write(ring, ((ring->rptr_save_reg -
2249 PACKET3_SET_UCONFIG_REG_START) >> 2));
2250 radeon_ring_write(ring, next_rptr);
2251 } else if (rdev->wb.enabled) {
2252 next_rptr = ring->wptr + 5 + 4;
2253 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2254 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2255 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2256 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2257 radeon_ring_write(ring, next_rptr);
2260 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2263 control |= ib->length_dw |
2264 (ib->vm ? (ib->vm->id << 24) : 0);
2266 radeon_ring_write(ring, header);
2267 radeon_ring_write(ring,
2271 (ib->gpu_addr & 0xFFFFFFFC));
2272 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2273 radeon_ring_write(ring, control);
2277 * cik_ib_test - basic gfx ring IB test
2279 * @rdev: radeon_device pointer
2280 * @ring: radeon_ring structure holding ring information
2282 * Allocate an IB and execute it on the gfx ring (CIK).
2283 * Provides a basic gfx ring test to verify that IBs are working.
2284 * Returns 0 on success, error on failure.
2286 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2288 struct radeon_ib ib;
2294 r = radeon_scratch_get(rdev, &scratch);
2296 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2299 WREG32(scratch, 0xCAFEDEAD);
2300 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2302 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2305 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2306 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2307 ib.ptr[2] = 0xDEADBEEF;
2309 r = radeon_ib_schedule(rdev, &ib, NULL);
2311 radeon_scratch_free(rdev, scratch);
2312 radeon_ib_free(rdev, &ib);
2313 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2316 r = radeon_fence_wait(ib.fence, false);
2318 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2321 for (i = 0; i < rdev->usec_timeout; i++) {
2322 tmp = RREG32(scratch);
2323 if (tmp == 0xDEADBEEF)
2327 if (i < rdev->usec_timeout) {
2328 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2330 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2334 radeon_scratch_free(rdev, scratch);
2335 radeon_ib_free(rdev, &ib);
2341 * On CIK, gfx and compute now have independant command processors.
2344 * Gfx consists of a single ring and can process both gfx jobs and
2345 * compute jobs. The gfx CP consists of three microengines (ME):
2346 * PFP - Pre-Fetch Parser
2348 * CE - Constant Engine
2349 * The PFP and ME make up what is considered the Drawing Engine (DE).
2350 * The CE is an asynchronous engine used for updating buffer desciptors
2351 * used by the DE so that they can be loaded into cache in parallel
2352 * while the DE is processing state update packets.
2355 * The compute CP consists of two microengines (ME):
2356 * MEC1 - Compute MicroEngine 1
2357 * MEC2 - Compute MicroEngine 2
2358 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2359 * The queues are exposed to userspace and are programmed directly
2360 * by the compute runtime.
2363 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2365 * @rdev: radeon_device pointer
2366 * @enable: enable or disable the MEs
2368 * Halts or unhalts the gfx MEs.
2370 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2373 WREG32(CP_ME_CNTL, 0);
2375 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2376 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2382 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2384 * @rdev: radeon_device pointer
2386 * Loads the gfx PFP, ME, and CE ucode.
2387 * Returns 0 for success, -EINVAL if the ucode is not available.
2389 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2391 const __be32 *fw_data;
2394 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2397 cik_cp_gfx_enable(rdev, false);
2400 fw_data = (const __be32 *)rdev->pfp_fw->data;
2401 WREG32(CP_PFP_UCODE_ADDR, 0);
2402 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2403 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2404 WREG32(CP_PFP_UCODE_ADDR, 0);
2407 fw_data = (const __be32 *)rdev->ce_fw->data;
2408 WREG32(CP_CE_UCODE_ADDR, 0);
2409 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2410 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2411 WREG32(CP_CE_UCODE_ADDR, 0);
2414 fw_data = (const __be32 *)rdev->me_fw->data;
2415 WREG32(CP_ME_RAM_WADDR, 0);
2416 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2417 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2418 WREG32(CP_ME_RAM_WADDR, 0);
2420 WREG32(CP_PFP_UCODE_ADDR, 0);
2421 WREG32(CP_CE_UCODE_ADDR, 0);
2422 WREG32(CP_ME_RAM_WADDR, 0);
2423 WREG32(CP_ME_RAM_RADDR, 0);
2428 * cik_cp_gfx_start - start the gfx ring
2430 * @rdev: radeon_device pointer
2432 * Enables the ring and loads the clear state context and other
2433 * packets required to init the ring.
2434 * Returns 0 for success, error for failure.
2436 static int cik_cp_gfx_start(struct radeon_device *rdev)
2438 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2442 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2443 WREG32(CP_ENDIAN_SWAP, 0);
2444 WREG32(CP_DEVICE_ID, 1);
2446 cik_cp_gfx_enable(rdev, true);
2448 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2450 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2454 /* init the CE partitions. CE only used for gfx on CIK */
2455 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2456 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2457 radeon_ring_write(ring, 0xc000);
2458 radeon_ring_write(ring, 0xc000);
2460 /* setup clear context state */
2461 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2462 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2464 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2465 radeon_ring_write(ring, 0x80000000);
2466 radeon_ring_write(ring, 0x80000000);
2468 for (i = 0; i < cik_default_size; i++)
2469 radeon_ring_write(ring, cik_default_state[i]);
2471 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2472 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2474 /* set clear context state */
2475 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2476 radeon_ring_write(ring, 0);
2478 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2479 radeon_ring_write(ring, 0x00000316);
2480 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2481 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2483 radeon_ring_unlock_commit(rdev, ring);
2489 * cik_cp_gfx_fini - stop the gfx ring
2491 * @rdev: radeon_device pointer
2493 * Stop the gfx ring and tear down the driver ring
2496 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2498 cik_cp_gfx_enable(rdev, false);
2499 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2503 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2505 * @rdev: radeon_device pointer
2507 * Program the location and size of the gfx ring buffer
2508 * and test it to make sure it's working.
2509 * Returns 0 for success, error for failure.
2511 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2513 struct radeon_ring *ring;
2519 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2520 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2522 /* Set the write pointer delay */
2523 WREG32(CP_RB_WPTR_DELAY, 0);
2525 /* set the RB to use vmid 0 */
2526 WREG32(CP_RB_VMID, 0);
2528 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2530 /* ring 0 - compute and gfx */
2531 /* Set ring buffer size */
2532 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2533 rb_bufsz = drm_order(ring->ring_size / 8);
2534 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2536 tmp |= BUF_SWAP_32BIT;
2538 WREG32(CP_RB0_CNTL, tmp);
2540 /* Initialize the ring buffer's read and write pointers */
2541 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2543 WREG32(CP_RB0_WPTR, ring->wptr);
2545 /* set the wb address wether it's enabled or not */
2546 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2547 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2549 /* scratch register shadowing is no longer supported */
2550 WREG32(SCRATCH_UMSK, 0);
2552 if (!rdev->wb.enabled)
2553 tmp |= RB_NO_UPDATE;
2556 WREG32(CP_RB0_CNTL, tmp);
2558 rb_addr = ring->gpu_addr >> 8;
2559 WREG32(CP_RB0_BASE, rb_addr);
2560 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2562 ring->rptr = RREG32(CP_RB0_RPTR);
2564 /* start the ring */
2565 cik_cp_gfx_start(rdev);
2566 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2567 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2569 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2575 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2576 struct radeon_ring *ring)
2582 if (rdev->wb.enabled) {
2583 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2585 spin_lock(&rdev->srbm_mutex);
2586 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2587 rptr = RREG32(CP_HQD_PQ_RPTR);
2588 cik_srbm_select(rdev, 0, 0, 0, 0);
2589 spin_unlock(&rdev->srbm_mutex);
2591 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2596 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2597 struct radeon_ring *ring)
2601 if (rdev->wb.enabled) {
2602 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2604 spin_lock(&rdev->srbm_mutex);
2605 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2606 wptr = RREG32(CP_HQD_PQ_WPTR);
2607 cik_srbm_select(rdev, 0, 0, 0, 0);
2608 spin_unlock(&rdev->srbm_mutex);
2610 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2615 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2616 struct radeon_ring *ring)
2618 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2620 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2621 WDOORBELL32(ring->doorbell_offset, wptr);
2625 * cik_cp_compute_enable - enable/disable the compute CP MEs
2627 * @rdev: radeon_device pointer
2628 * @enable: enable or disable the MEs
2630 * Halts or unhalts the compute MEs.
2632 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2635 WREG32(CP_MEC_CNTL, 0);
2637 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2642 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2644 * @rdev: radeon_device pointer
2646 * Loads the compute MEC1&2 ucode.
2647 * Returns 0 for success, -EINVAL if the ucode is not available.
2649 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2651 const __be32 *fw_data;
2657 cik_cp_compute_enable(rdev, false);
2660 fw_data = (const __be32 *)rdev->mec_fw->data;
2661 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2662 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2663 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2664 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2666 if (rdev->family == CHIP_KAVERI) {
2668 fw_data = (const __be32 *)rdev->mec_fw->data;
2669 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2670 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2671 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2672 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2679 * cik_cp_compute_start - start the compute queues
2681 * @rdev: radeon_device pointer
2683 * Enable the compute queues.
2684 * Returns 0 for success, error for failure.
2686 static int cik_cp_compute_start(struct radeon_device *rdev)
2688 cik_cp_compute_enable(rdev, true);
2694 * cik_cp_compute_fini - stop the compute queues
2696 * @rdev: radeon_device pointer
2698 * Stop the compute queues and tear down the driver queue
2701 static void cik_cp_compute_fini(struct radeon_device *rdev)
2705 cik_cp_compute_enable(rdev, false);
2707 for (i = 0; i < 2; i++) {
2709 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2711 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2713 if (rdev->ring[idx].mqd_obj) {
2714 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2715 if (unlikely(r != 0))
2716 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2718 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2719 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2721 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2722 rdev->ring[idx].mqd_obj = NULL;
2727 static void cik_mec_fini(struct radeon_device *rdev)
2731 if (rdev->mec.hpd_eop_obj) {
2732 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2733 if (unlikely(r != 0))
2734 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2735 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2736 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2738 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2739 rdev->mec.hpd_eop_obj = NULL;
2743 #define MEC_HPD_SIZE 2048
2745 static int cik_mec_init(struct radeon_device *rdev)
2751 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2752 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2754 if (rdev->family == CHIP_KAVERI)
2755 rdev->mec.num_mec = 2;
2757 rdev->mec.num_mec = 1;
2758 rdev->mec.num_pipe = 4;
2759 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2761 if (rdev->mec.hpd_eop_obj == NULL) {
2762 r = radeon_bo_create(rdev,
2763 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2765 RADEON_GEM_DOMAIN_GTT, NULL,
2766 &rdev->mec.hpd_eop_obj);
2768 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2773 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2774 if (unlikely(r != 0)) {
2778 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2779 &rdev->mec.hpd_eop_gpu_addr);
2781 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2785 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2787 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2792 /* clear memory. Not sure if this is required or not */
2793 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2795 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2796 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2801 struct hqd_registers
2803 u32 cp_mqd_base_addr;
2804 u32 cp_mqd_base_addr_hi;
2807 u32 cp_hqd_persistent_state;
2808 u32 cp_hqd_pipe_priority;
2809 u32 cp_hqd_queue_priority;
2812 u32 cp_hqd_pq_base_hi;
2814 u32 cp_hqd_pq_rptr_report_addr;
2815 u32 cp_hqd_pq_rptr_report_addr_hi;
2816 u32 cp_hqd_pq_wptr_poll_addr;
2817 u32 cp_hqd_pq_wptr_poll_addr_hi;
2818 u32 cp_hqd_pq_doorbell_control;
2820 u32 cp_hqd_pq_control;
2821 u32 cp_hqd_ib_base_addr;
2822 u32 cp_hqd_ib_base_addr_hi;
2824 u32 cp_hqd_ib_control;
2825 u32 cp_hqd_iq_timer;
2827 u32 cp_hqd_dequeue_request;
2828 u32 cp_hqd_dma_offload;
2829 u32 cp_hqd_sema_cmd;
2830 u32 cp_hqd_msg_type;
2831 u32 cp_hqd_atomic0_preop_lo;
2832 u32 cp_hqd_atomic0_preop_hi;
2833 u32 cp_hqd_atomic1_preop_lo;
2834 u32 cp_hqd_atomic1_preop_hi;
2835 u32 cp_hqd_hq_scheduler0;
2836 u32 cp_hqd_hq_scheduler1;
2843 u32 dispatch_initiator;
2847 u32 pipeline_stat_enable;
2848 u32 perf_counter_enable;
2854 u32 resource_limits;
2855 u32 static_thread_mgmt01[2];
2857 u32 static_thread_mgmt23[2];
2859 u32 thread_trace_enable;
2862 u32 vgtcs_invoke_count[2];
2863 struct hqd_registers queue_state;
2865 u32 interrupt_queue[64];
2869 * cik_cp_compute_resume - setup the compute queue registers
2871 * @rdev: radeon_device pointer
2873 * Program the compute queues and test them to make sure they
2875 * Returns 0 for success, error for failure.
2877 static int cik_cp_compute_resume(struct radeon_device *rdev)
2881 bool use_doorbell = true;
2887 struct bonaire_mqd *mqd;
2889 r = cik_cp_compute_start(rdev);
2893 /* fix up chicken bits */
2894 tmp = RREG32(CP_CPF_DEBUG);
2896 WREG32(CP_CPF_DEBUG, tmp);
2898 /* init the pipes */
2899 spin_lock(&rdev->srbm_mutex);
2900 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2901 int me = (i < 4) ? 1 : 2;
2902 int pipe = (i < 4) ? i : (i - 4);
2904 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2906 cik_srbm_select(rdev, me, pipe, 0, 0);
2908 /* write the EOP addr */
2909 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2910 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2912 /* set the VMID assigned */
2913 WREG32(CP_HPD_EOP_VMID, 0);
2915 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2916 tmp = RREG32(CP_HPD_EOP_CONTROL);
2917 tmp &= ~EOP_SIZE_MASK;
2918 tmp |= drm_order(MEC_HPD_SIZE / 8);
2919 WREG32(CP_HPD_EOP_CONTROL, tmp);
2921 cik_srbm_select(rdev, 0, 0, 0, 0);
2922 spin_unlock(&rdev->srbm_mutex);
2924 /* init the queues. Just two for now. */
2925 for (i = 0; i < 2; i++) {
2927 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2929 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2931 if (rdev->ring[idx].mqd_obj == NULL) {
2932 r = radeon_bo_create(rdev,
2933 sizeof(struct bonaire_mqd),
2935 RADEON_GEM_DOMAIN_GTT, NULL,
2936 &rdev->ring[idx].mqd_obj);
2938 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2943 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2944 if (unlikely(r != 0)) {
2945 cik_cp_compute_fini(rdev);
2948 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2951 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2952 cik_cp_compute_fini(rdev);
2955 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2957 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2958 cik_cp_compute_fini(rdev);
2962 /* doorbell offset */
2963 rdev->ring[idx].doorbell_offset =
2964 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2966 /* init the mqd struct */
2967 memset(buf, 0, sizeof(struct bonaire_mqd));
2969 mqd = (struct bonaire_mqd *)buf;
2970 mqd->header = 0xC0310800;
2971 mqd->static_thread_mgmt01[0] = 0xffffffff;
2972 mqd->static_thread_mgmt01[1] = 0xffffffff;
2973 mqd->static_thread_mgmt23[0] = 0xffffffff;
2974 mqd->static_thread_mgmt23[1] = 0xffffffff;
2976 spin_lock(&rdev->srbm_mutex);
2977 cik_srbm_select(rdev, rdev->ring[idx].me,
2978 rdev->ring[idx].pipe,
2979 rdev->ring[idx].queue, 0);
2981 /* disable wptr polling */
2982 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2983 tmp &= ~WPTR_POLL_EN;
2984 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2986 /* enable doorbell? */
2987 mqd->queue_state.cp_hqd_pq_doorbell_control =
2988 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2990 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2992 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2993 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2994 mqd->queue_state.cp_hqd_pq_doorbell_control);
2996 /* disable the queue if it's active */
2997 mqd->queue_state.cp_hqd_dequeue_request = 0;
2998 mqd->queue_state.cp_hqd_pq_rptr = 0;
2999 mqd->queue_state.cp_hqd_pq_wptr= 0;
3000 if (RREG32(CP_HQD_ACTIVE) & 1) {
3001 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3002 for (i = 0; i < rdev->usec_timeout; i++) {
3003 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3007 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3008 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3009 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3012 /* set the pointer to the MQD */
3013 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3014 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3015 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3016 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3017 /* set MQD vmid to 0 */
3018 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3019 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3020 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3022 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3023 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3024 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3025 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3026 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3027 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3029 /* set up the HQD, this is similar to CP_RB0_CNTL */
3030 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3031 mqd->queue_state.cp_hqd_pq_control &=
3032 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3034 mqd->queue_state.cp_hqd_pq_control |=
3035 drm_order(rdev->ring[idx].ring_size / 8);
3036 mqd->queue_state.cp_hqd_pq_control |=
3037 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3039 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3041 mqd->queue_state.cp_hqd_pq_control &=
3042 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3043 mqd->queue_state.cp_hqd_pq_control |=
3044 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3045 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3047 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3049 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3051 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3052 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3053 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3054 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3055 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3056 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3058 /* set the wb address wether it's enabled or not */
3060 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3062 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3063 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3064 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3065 upper_32_bits(wb_gpu_addr) & 0xffff;
3066 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3067 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3068 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3069 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3071 /* enable the doorbell if requested */
3073 mqd->queue_state.cp_hqd_pq_doorbell_control =
3074 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3075 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3076 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3077 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3078 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3079 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3080 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3083 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3085 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3086 mqd->queue_state.cp_hqd_pq_doorbell_control);
3088 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3089 rdev->ring[idx].wptr = 0;
3090 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3091 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3092 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3093 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3095 /* set the vmid for the queue */
3096 mqd->queue_state.cp_hqd_vmid = 0;
3097 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3099 /* activate the queue */
3100 mqd->queue_state.cp_hqd_active = 1;
3101 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3103 cik_srbm_select(rdev, 0, 0, 0, 0);
3104 spin_unlock(&rdev->srbm_mutex);
3106 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3107 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3109 rdev->ring[idx].ready = true;
3110 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3112 rdev->ring[idx].ready = false;
3118 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3120 cik_cp_gfx_enable(rdev, enable);
3121 cik_cp_compute_enable(rdev, enable);
3124 static int cik_cp_load_microcode(struct radeon_device *rdev)
3128 r = cik_cp_gfx_load_microcode(rdev);
3131 r = cik_cp_compute_load_microcode(rdev);
3138 static void cik_cp_fini(struct radeon_device *rdev)
3140 cik_cp_gfx_fini(rdev);
3141 cik_cp_compute_fini(rdev);
3144 static int cik_cp_resume(struct radeon_device *rdev)
3148 /* Reset all cp blocks */
3149 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3150 RREG32(GRBM_SOFT_RESET);
3152 WREG32(GRBM_SOFT_RESET, 0);
3153 RREG32(GRBM_SOFT_RESET);
3155 r = cik_cp_load_microcode(rdev);
3159 r = cik_cp_gfx_resume(rdev);
3162 r = cik_cp_compute_resume(rdev);
3171 * Starting with CIK, the GPU has new asynchronous
3172 * DMA engines. These engines are used for compute
3173 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3174 * and each one supports 1 ring buffer used for gfx
3175 * and 2 queues used for compute.
3177 * The programming model is very similar to the CP
3178 * (ring buffer, IBs, etc.), but sDMA has it's own
3179 * packet format that is different from the PM4 format
3180 * used by the CP. sDMA supports copying data, writing
3181 * embedded data, solid fills, and a number of other
3182 * things. It also has support for tiling/detiling of
3186 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3188 * @rdev: radeon_device pointer
3189 * @ib: IB object to schedule
3191 * Schedule an IB in the DMA ring (CIK).
3193 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3194 struct radeon_ib *ib)
3196 struct radeon_ring *ring = &rdev->ring[ib->ring];
3197 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3199 if (rdev->wb.enabled) {
3200 u32 next_rptr = ring->wptr + 5;
3201 while ((next_rptr & 7) != 4)
3204 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3205 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3206 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3207 radeon_ring_write(ring, 1); /* number of DWs to follow */
3208 radeon_ring_write(ring, next_rptr);
3211 /* IB packet must end on a 8 DW boundary */
3212 while ((ring->wptr & 7) != 4)
3213 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3214 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3215 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3216 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3217 radeon_ring_write(ring, ib->length_dw);
3222 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3224 * @rdev: radeon_device pointer
3225 * @fence: radeon fence object
3227 * Add a DMA fence packet to the ring to write
3228 * the fence seq number and DMA trap packet to generate
3229 * an interrupt if needed (CIK).
3231 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3232 struct radeon_fence *fence)
3234 struct radeon_ring *ring = &rdev->ring[fence->ring];
3235 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3236 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3237 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3240 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3241 ref_and_mask = SDMA0;
3243 ref_and_mask = SDMA1;
3245 /* write the fence */
3246 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3247 radeon_ring_write(ring, addr & 0xffffffff);
3248 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3249 radeon_ring_write(ring, fence->seq);
3250 /* generate an interrupt */
3251 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3253 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3254 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3255 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3256 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3257 radeon_ring_write(ring, ref_and_mask); /* MASK */
3258 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3262 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3264 * @rdev: radeon_device pointer
3265 * @ring: radeon_ring structure holding ring information
3266 * @semaphore: radeon semaphore object
3267 * @emit_wait: wait or signal semaphore
3269 * Add a DMA semaphore packet to the ring wait on or signal
3270 * other rings (CIK).
3272 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3273 struct radeon_ring *ring,
3274 struct radeon_semaphore *semaphore,
3277 u64 addr = semaphore->gpu_addr;
3278 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3280 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3281 radeon_ring_write(ring, addr & 0xfffffff8);
3282 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3286 * cik_sdma_gfx_stop - stop the gfx async dma engines
3288 * @rdev: radeon_device pointer
3290 * Stop the gfx async dma ring buffers (CIK).
3292 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3294 u32 rb_cntl, reg_offset;
3297 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3299 for (i = 0; i < 2; i++) {
3301 reg_offset = SDMA0_REGISTER_OFFSET;
3303 reg_offset = SDMA1_REGISTER_OFFSET;
3304 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3305 rb_cntl &= ~SDMA_RB_ENABLE;
3306 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3307 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3312 * cik_sdma_rlc_stop - stop the compute async dma engines
3314 * @rdev: radeon_device pointer
3316 * Stop the compute async dma queues (CIK).
3318 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3324 * cik_sdma_enable - stop the async dma engines
3326 * @rdev: radeon_device pointer
3327 * @enable: enable/disable the DMA MEs.
3329 * Halt or unhalt the async dma engines (CIK).
3331 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3333 u32 me_cntl, reg_offset;
3336 for (i = 0; i < 2; i++) {
3338 reg_offset = SDMA0_REGISTER_OFFSET;
3340 reg_offset = SDMA1_REGISTER_OFFSET;
3341 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3343 me_cntl &= ~SDMA_HALT;
3345 me_cntl |= SDMA_HALT;
3346 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3351 * cik_sdma_gfx_resume - setup and start the async dma engines
3353 * @rdev: radeon_device pointer
3355 * Set up the gfx DMA ring buffers and enable them (CIK).
3356 * Returns 0 for success, error for failure.
3358 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3360 struct radeon_ring *ring;
3361 u32 rb_cntl, ib_cntl;
3363 u32 reg_offset, wb_offset;
3366 for (i = 0; i < 2; i++) {
3368 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3369 reg_offset = SDMA0_REGISTER_OFFSET;
3370 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3372 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3373 reg_offset = SDMA1_REGISTER_OFFSET;
3374 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3377 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3378 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3380 /* Set ring buffer size in dwords */
3381 rb_bufsz = drm_order(ring->ring_size / 4);
3382 rb_cntl = rb_bufsz << 1;
3384 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3386 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3388 /* Initialize the ring buffer's read and write pointers */
3389 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3390 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3392 /* set the wb address whether it's enabled or not */
3393 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3394 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3395 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3396 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3398 if (rdev->wb.enabled)
3399 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3401 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3402 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3405 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3407 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3410 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3412 ib_cntl = SDMA_IB_ENABLE;
3414 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3416 /* enable DMA IBs */
3417 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3421 r = radeon_ring_test(rdev, ring->idx, ring);
3423 ring->ready = false;
3428 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3434 * cik_sdma_rlc_resume - setup and start the async dma engines
3436 * @rdev: radeon_device pointer
3438 * Set up the compute DMA queues and enable them (CIK).
3439 * Returns 0 for success, error for failure.
3441 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3448 * cik_sdma_load_microcode - load the sDMA ME ucode
3450 * @rdev: radeon_device pointer
3452 * Loads the sDMA0/1 ucode.
3453 * Returns 0 for success, -EINVAL if the ucode is not available.
3455 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3457 const __be32 *fw_data;
3463 /* stop the gfx rings and rlc compute queues */
3464 cik_sdma_gfx_stop(rdev);
3465 cik_sdma_rlc_stop(rdev);
3468 cik_sdma_enable(rdev, false);
3471 fw_data = (const __be32 *)rdev->sdma_fw->data;
3472 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3473 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3474 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3475 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3478 fw_data = (const __be32 *)rdev->sdma_fw->data;
3479 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3480 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3481 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3482 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3484 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3485 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3490 * cik_sdma_resume - setup and start the async dma engines
3492 * @rdev: radeon_device pointer
3494 * Set up the DMA engines and enable them (CIK).
3495 * Returns 0 for success, error for failure.
3497 static __unused int cik_sdma_resume(struct radeon_device *rdev)
3502 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3503 RREG32(SRBM_SOFT_RESET);
3505 WREG32(SRBM_SOFT_RESET, 0);
3506 RREG32(SRBM_SOFT_RESET);
3508 r = cik_sdma_load_microcode(rdev);
3512 /* unhalt the MEs */
3513 cik_sdma_enable(rdev, true);
3515 /* start the gfx rings and rlc compute queues */
3516 r = cik_sdma_gfx_resume(rdev);
3519 r = cik_sdma_rlc_resume(rdev);
3527 * cik_sdma_fini - tear down the async dma engines
3529 * @rdev: radeon_device pointer
3531 * Stop the async dma engines and free the rings (CIK).
3533 static __unused void cik_sdma_fini(struct radeon_device *rdev)
3535 /* stop the gfx rings and rlc compute queues */
3536 cik_sdma_gfx_stop(rdev);
3537 cik_sdma_rlc_stop(rdev);
3539 cik_sdma_enable(rdev, false);
3540 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3541 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3542 /* XXX - compute dma queue tear down */
3546 * cik_copy_dma - copy pages using the DMA engine
3548 * @rdev: radeon_device pointer
3549 * @src_offset: src GPU address
3550 * @dst_offset: dst GPU address
3551 * @num_gpu_pages: number of GPU pages to xfer
3552 * @fence: radeon fence object
3554 * Copy GPU paging using the DMA engine (CIK).
3555 * Used by the radeon ttm implementation to move pages if
3556 * registered as the asic copy callback.
3558 int cik_copy_dma(struct radeon_device *rdev,
3559 uint64_t src_offset, uint64_t dst_offset,
3560 unsigned num_gpu_pages,
3561 struct radeon_fence **fence)
3563 struct radeon_semaphore *sem = NULL;
3564 int ring_index = rdev->asic->copy.dma_ring_index;
3565 struct radeon_ring *ring = &rdev->ring[ring_index];
3566 u32 size_in_bytes, cur_size_in_bytes;
3570 r = radeon_semaphore_create(rdev, &sem);
3572 DRM_ERROR("radeon: moving bo (%d).\n", r);
3576 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3577 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3578 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3580 DRM_ERROR("radeon: moving bo (%d).\n", r);
3581 radeon_semaphore_free(rdev, &sem, NULL);
3585 if (radeon_fence_need_sync(*fence, ring->idx)) {
3586 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3588 radeon_fence_note_sync(*fence, ring->idx);
3590 radeon_semaphore_free(rdev, &sem, NULL);
3593 for (i = 0; i < num_loops; i++) {
3594 cur_size_in_bytes = size_in_bytes;
3595 if (cur_size_in_bytes > 0x1fffff)
3596 cur_size_in_bytes = 0x1fffff;
3597 size_in_bytes -= cur_size_in_bytes;
3598 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3599 radeon_ring_write(ring, cur_size_in_bytes);
3600 radeon_ring_write(ring, 0); /* src/dst endian swap */
3601 radeon_ring_write(ring, src_offset & 0xffffffff);
3602 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3603 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3604 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3605 src_offset += cur_size_in_bytes;
3606 dst_offset += cur_size_in_bytes;
3609 r = radeon_fence_emit(rdev, fence, ring->idx);
3611 radeon_ring_unlock_undo(rdev, ring);
3615 radeon_ring_unlock_commit(rdev, ring);
3616 radeon_semaphore_free(rdev, &sem, *fence);
3622 * cik_sdma_ring_test - simple async dma engine test
3624 * @rdev: radeon_device pointer
3625 * @ring: radeon_ring structure holding ring information
3627 * Test the DMA engine by writing using it to write an
3628 * value to memory. (CIK).
3629 * Returns 0 for success, error for failure.
3631 int cik_sdma_ring_test(struct radeon_device *rdev,
3632 struct radeon_ring *ring)
3636 volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr;
3640 DRM_ERROR("invalid vram scratch pointer\n");
3647 r = radeon_ring_lock(rdev, ring, 4);
3649 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3652 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3653 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3654 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3655 radeon_ring_write(ring, 1); /* number of DWs to follow */
3656 radeon_ring_write(ring, 0xDEADBEEF);
3657 radeon_ring_unlock_commit(rdev, ring);
3659 for (i = 0; i < rdev->usec_timeout; i++) {
3661 if (tmp == 0xDEADBEEF)
3666 if (i < rdev->usec_timeout) {
3667 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3669 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3677 * cik_sdma_ib_test - test an IB on the DMA engine
3679 * @rdev: radeon_device pointer
3680 * @ring: radeon_ring structure holding ring information
3682 * Test a simple IB in the DMA ring (CIK).
3683 * Returns 0 on success, error on failure.
3685 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3687 struct radeon_ib ib;
3690 volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr;
3694 DRM_ERROR("invalid vram scratch pointer\n");
3701 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3703 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3707 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3708 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3709 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3711 ib.ptr[4] = 0xDEADBEEF;
3714 r = radeon_ib_schedule(rdev, &ib, NULL);
3716 radeon_ib_free(rdev, &ib);
3717 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3720 r = radeon_fence_wait(ib.fence, false);
3722 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3725 for (i = 0; i < rdev->usec_timeout; i++) {
3727 if (tmp == 0xDEADBEEF)
3731 if (i < rdev->usec_timeout) {
3732 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3734 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3737 radeon_ib_free(rdev, &ib);
3742 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3744 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3745 RREG32(GRBM_STATUS));
3746 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3747 RREG32(GRBM_STATUS2));
3748 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3749 RREG32(GRBM_STATUS_SE0));
3750 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3751 RREG32(GRBM_STATUS_SE1));
3752 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3753 RREG32(GRBM_STATUS_SE2));
3754 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3755 RREG32(GRBM_STATUS_SE3));
3756 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3757 RREG32(SRBM_STATUS));
3758 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3759 RREG32(SRBM_STATUS2));
3760 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3761 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3762 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3763 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3764 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3765 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3766 RREG32(CP_STALLED_STAT1));
3767 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3768 RREG32(CP_STALLED_STAT2));
3769 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3770 RREG32(CP_STALLED_STAT3));
3771 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3772 RREG32(CP_CPF_BUSY_STAT));
3773 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3774 RREG32(CP_CPF_STALLED_STAT1));
3775 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3776 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3777 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3778 RREG32(CP_CPC_STALLED_STAT1));
3779 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3783 * cik_gpu_check_soft_reset - check which blocks are busy
3785 * @rdev: radeon_device pointer
3787 * Check which blocks are busy and return the relevant reset
3788 * mask to be used by cik_gpu_soft_reset().
3789 * Returns a mask of the blocks to be reset.
3791 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3797 tmp = RREG32(GRBM_STATUS);
3798 if (tmp & (PA_BUSY | SC_BUSY |
3799 BCI_BUSY | SX_BUSY |
3800 TA_BUSY | VGT_BUSY |
3802 GDS_BUSY | SPI_BUSY |
3803 IA_BUSY | IA_BUSY_NO_DMA))
3804 reset_mask |= RADEON_RESET_GFX;
3806 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3807 reset_mask |= RADEON_RESET_CP;
3810 tmp = RREG32(GRBM_STATUS2);
3812 reset_mask |= RADEON_RESET_RLC;
3814 /* SDMA0_STATUS_REG */
3815 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3816 if (!(tmp & SDMA_IDLE))
3817 reset_mask |= RADEON_RESET_DMA;
3819 /* SDMA1_STATUS_REG */
3820 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3821 if (!(tmp & SDMA_IDLE))
3822 reset_mask |= RADEON_RESET_DMA1;
3825 tmp = RREG32(SRBM_STATUS2);
3826 if (tmp & SDMA_BUSY)
3827 reset_mask |= RADEON_RESET_DMA;
3829 if (tmp & SDMA1_BUSY)
3830 reset_mask |= RADEON_RESET_DMA1;
3833 tmp = RREG32(SRBM_STATUS);
3836 reset_mask |= RADEON_RESET_IH;
3839 reset_mask |= RADEON_RESET_SEM;
3841 if (tmp & GRBM_RQ_PENDING)
3842 reset_mask |= RADEON_RESET_GRBM;
3845 reset_mask |= RADEON_RESET_VMC;
3847 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3848 MCC_BUSY | MCD_BUSY))
3849 reset_mask |= RADEON_RESET_MC;
3851 if (evergreen_is_display_hung(rdev))
3852 reset_mask |= RADEON_RESET_DISPLAY;
3854 /* Skip MC reset as it's mostly likely not hung, just busy */
3855 if (reset_mask & RADEON_RESET_MC) {
3856 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3857 reset_mask &= ~RADEON_RESET_MC;
3864 * cik_gpu_soft_reset - soft reset GPU
3866 * @rdev: radeon_device pointer
3867 * @reset_mask: mask of which blocks to reset
3869 * Soft reset the blocks specified in @reset_mask.
3871 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3873 struct evergreen_mc_save save;
3874 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3877 if (reset_mask == 0)
3880 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3882 cik_print_gpu_status_regs(rdev);
3883 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3884 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3885 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3886 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3891 /* Disable GFX parsing/prefetching */
3892 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3894 /* Disable MEC parsing/prefetching */
3895 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3897 if (reset_mask & RADEON_RESET_DMA) {
3899 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3901 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3903 if (reset_mask & RADEON_RESET_DMA1) {
3905 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3907 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3910 evergreen_mc_stop(rdev, &save);
3911 if (evergreen_mc_wait_for_idle(rdev)) {
3912 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3914 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3915 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3917 if (reset_mask & RADEON_RESET_CP) {
3918 grbm_soft_reset |= SOFT_RESET_CP;
3920 srbm_soft_reset |= SOFT_RESET_GRBM;
3923 if (reset_mask & RADEON_RESET_DMA)
3924 srbm_soft_reset |= SOFT_RESET_SDMA;
3926 if (reset_mask & RADEON_RESET_DMA1)
3927 srbm_soft_reset |= SOFT_RESET_SDMA1;
3929 if (reset_mask & RADEON_RESET_DISPLAY)
3930 srbm_soft_reset |= SOFT_RESET_DC;
3932 if (reset_mask & RADEON_RESET_RLC)
3933 grbm_soft_reset |= SOFT_RESET_RLC;
3935 if (reset_mask & RADEON_RESET_SEM)
3936 srbm_soft_reset |= SOFT_RESET_SEM;
3938 if (reset_mask & RADEON_RESET_IH)
3939 srbm_soft_reset |= SOFT_RESET_IH;
3941 if (reset_mask & RADEON_RESET_GRBM)
3942 srbm_soft_reset |= SOFT_RESET_GRBM;
3944 if (reset_mask & RADEON_RESET_VMC)
3945 srbm_soft_reset |= SOFT_RESET_VMC;
3947 if (!(rdev->flags & RADEON_IS_IGP)) {
3948 if (reset_mask & RADEON_RESET_MC)
3949 srbm_soft_reset |= SOFT_RESET_MC;
3952 if (grbm_soft_reset) {
3953 tmp = RREG32(GRBM_SOFT_RESET);
3954 tmp |= grbm_soft_reset;
3955 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3956 WREG32(GRBM_SOFT_RESET, tmp);
3957 tmp = RREG32(GRBM_SOFT_RESET);
3961 tmp &= ~grbm_soft_reset;
3962 WREG32(GRBM_SOFT_RESET, tmp);
3963 tmp = RREG32(GRBM_SOFT_RESET);
3966 if (srbm_soft_reset) {
3967 tmp = RREG32(SRBM_SOFT_RESET);
3968 tmp |= srbm_soft_reset;
3969 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3970 WREG32(SRBM_SOFT_RESET, tmp);
3971 tmp = RREG32(SRBM_SOFT_RESET);
3975 tmp &= ~srbm_soft_reset;
3976 WREG32(SRBM_SOFT_RESET, tmp);
3977 tmp = RREG32(SRBM_SOFT_RESET);
3980 /* Wait a little for things to settle down */
3983 evergreen_mc_resume(rdev, &save);
3986 cik_print_gpu_status_regs(rdev);
3990 * cik_asic_reset - soft reset GPU
3992 * @rdev: radeon_device pointer
3994 * Look up which blocks are hung and attempt
3996 * Returns 0 for success.
3998 int cik_asic_reset(struct radeon_device *rdev)
4002 reset_mask = cik_gpu_check_soft_reset(rdev);
4005 r600_set_bios_scratch_engine_hung(rdev, true);
4007 cik_gpu_soft_reset(rdev, reset_mask);
4009 reset_mask = cik_gpu_check_soft_reset(rdev);
4012 r600_set_bios_scratch_engine_hung(rdev, false);
4018 * cik_gfx_is_lockup - check if the 3D engine is locked up
4020 * @rdev: radeon_device pointer
4021 * @ring: radeon_ring structure holding ring information
4023 * Check if the 3D engine is locked up (CIK).
4024 * Returns true if the engine is locked, false if not.
4026 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4028 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4030 if (!(reset_mask & (RADEON_RESET_GFX |
4031 RADEON_RESET_COMPUTE |
4032 RADEON_RESET_CP))) {
4033 radeon_ring_lockup_update(ring);
4036 /* force CP activities */
4037 radeon_ring_force_activity(rdev, ring);
4038 return radeon_ring_test_lockup(rdev, ring);
4042 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4044 * @rdev: radeon_device pointer
4045 * @ring: radeon_ring structure holding ring information
4047 * Check if the async DMA engine is locked up (CIK).
4048 * Returns true if the engine appears to be locked up, false if not.
4050 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4052 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4055 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4056 mask = RADEON_RESET_DMA;
4058 mask = RADEON_RESET_DMA1;
4060 if (!(reset_mask & mask)) {
4061 radeon_ring_lockup_update(ring);
4064 /* force ring activities */
4065 radeon_ring_force_activity(rdev, ring);
4066 return radeon_ring_test_lockup(rdev, ring);
4071 * cik_mc_program - program the GPU memory controller
4073 * @rdev: radeon_device pointer
4075 * Set the location of vram, gart, and AGP in the GPU's
4076 * physical address space (CIK).
4078 static __unused void cik_mc_program(struct radeon_device *rdev)
4080 struct evergreen_mc_save save;
4084 /* Initialize HDP */
4085 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4086 WREG32((0x2c14 + j), 0x00000000);
4087 WREG32((0x2c18 + j), 0x00000000);
4088 WREG32((0x2c1c + j), 0x00000000);
4089 WREG32((0x2c20 + j), 0x00000000);
4090 WREG32((0x2c24 + j), 0x00000000);
4092 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4094 evergreen_mc_stop(rdev, &save);
4095 if (radeon_mc_wait_for_idle(rdev)) {
4096 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4098 /* Lockout access through VGA aperture*/
4099 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4100 /* Update configuration */
4101 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4102 rdev->mc.vram_start >> 12);
4103 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4104 rdev->mc.vram_end >> 12);
4105 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4106 rdev->vram_scratch.gpu_addr >> 12);
4107 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4108 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4109 WREG32(MC_VM_FB_LOCATION, tmp);
4110 /* XXX double check these! */
4111 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4112 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4113 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4114 WREG32(MC_VM_AGP_BASE, 0);
4115 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4116 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4117 if (radeon_mc_wait_for_idle(rdev)) {
4118 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4120 evergreen_mc_resume(rdev, &save);
4121 /* we need to own VRAM, so turn off the VGA renderer here
4122 * to stop it overwriting our objects */
4123 rv515_vga_render_disable(rdev);
4127 * cik_mc_init - initialize the memory controller driver params
4129 * @rdev: radeon_device pointer
4131 * Look up the amount of vram, vram width, and decide how to place
4132 * vram and gart within the GPU's physical address space (CIK).
4133 * Returns 0 for success.
4135 static __unused int cik_mc_init(struct radeon_device *rdev)
4138 int chansize, numchan;
4140 /* Get VRAM informations */
4141 rdev->mc.vram_is_ddr = true;
4142 tmp = RREG32(MC_ARB_RAMCFG);
4143 if (tmp & CHANSIZE_MASK) {
4148 tmp = RREG32(MC_SHARED_CHMAP);
4149 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4179 rdev->mc.vram_width = numchan * chansize;
4180 /* Could aper size report 0 ? */
4181 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
4182 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
4183 /* size in MB on si */
4184 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4185 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4186 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4187 si_vram_gtt_location(rdev, &rdev->mc);
4188 radeon_update_bandwidth_info(rdev);
4195 * VMID 0 is the physical GPU addresses as used by the kernel.
4196 * VMIDs 1-15 are used for userspace clients and are handled
4197 * by the radeon vm/hsa code.
4200 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4202 * @rdev: radeon_device pointer
4204 * Flush the TLB for the VMID 0 page table (CIK).
4206 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4208 /* flush hdp cache */
4209 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4211 /* bits 0-15 are the VM contexts0-15 */
4212 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4216 * cik_pcie_gart_enable - gart enable
4218 * @rdev: radeon_device pointer
4220 * This sets up the TLBs, programs the page tables for VMID0,
4221 * sets up the hw for VMIDs 1-15 which are allocated on
4222 * demand, and sets up the global locations for the LDS, GDS,
4223 * and GPUVM for FSA64 clients (CIK).
4224 * Returns 0 for success, errors for failure.
4226 static __unused int cik_pcie_gart_enable(struct radeon_device *rdev)
4230 if (rdev->gart.robj == NULL) {
4231 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4234 r = radeon_gart_table_vram_pin(rdev);
4237 radeon_gart_restore(rdev);
4238 /* Setup TLB control */
4239 WREG32(MC_VM_MX_L1_TLB_CNTL,
4242 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4243 ENABLE_ADVANCED_DRIVER_MODEL |
4244 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4245 /* Setup L2 cache */
4246 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4247 ENABLE_L2_FRAGMENT_PROCESSING |
4248 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4249 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4250 EFFECTIVE_L2_QUEUE_SIZE(7) |
4251 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4252 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4253 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4254 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4255 /* setup context0 */
4256 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4257 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4258 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4259 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4260 (u32)(rdev->dummy_page.addr >> 12));
4261 WREG32(VM_CONTEXT0_CNTL2, 0);
4262 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4263 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4269 /* empty context1-15 */
4270 /* FIXME start with 4G, once using 2 level pt switch to full
4273 /* set vm size, must be a multiple of 4 */
4274 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4275 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4276 for (i = 1; i < 16; i++) {
4278 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4279 rdev->gart.table_addr >> 12);
4281 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4282 rdev->gart.table_addr >> 12);
4285 /* enable context1-15 */
4286 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4287 (u32)(rdev->dummy_page.addr >> 12));
4288 WREG32(VM_CONTEXT1_CNTL2, 4);
4289 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4290 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4291 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4292 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4293 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4294 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4295 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4296 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4297 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4298 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4299 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4300 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4301 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4303 /* TC cache setup ??? */
4304 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4305 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4306 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4308 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4309 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4310 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4311 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4312 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4314 WREG32(TC_CFG_L1_VOLATILE, 0);
4315 WREG32(TC_CFG_L2_VOLATILE, 0);
4317 if (rdev->family == CHIP_KAVERI) {
4318 u32 tmp = RREG32(CHUB_CONTROL);
4320 WREG32(CHUB_CONTROL, tmp);
4323 /* XXX SH_MEM regs */
4324 /* where to put LDS, scratch, GPUVM in FSA64 space */
4325 spin_lock(&rdev->srbm_mutex);
4326 for (i = 0; i < 16; i++) {
4327 cik_srbm_select(rdev, 0, 0, 0, i);
4328 /* CP and shaders */
4329 WREG32(SH_MEM_CONFIG, 0);
4330 WREG32(SH_MEM_APE1_BASE, 1);
4331 WREG32(SH_MEM_APE1_LIMIT, 0);
4332 WREG32(SH_MEM_BASES, 0);
4334 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4335 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4336 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4337 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4338 /* XXX SDMA RLC - todo */
4340 cik_srbm_select(rdev, 0, 0, 0, 0);
4341 spin_unlock(&rdev->srbm_mutex);
4343 cik_pcie_gart_tlb_flush(rdev);
4344 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4345 (unsigned)(rdev->mc.gtt_size >> 20),
4346 (unsigned long long)rdev->gart.table_addr);
4347 rdev->gart.ready = true;
4352 * cik_pcie_gart_disable - gart disable
4354 * @rdev: radeon_device pointer
4356 * This disables all VM page table (CIK).
4358 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4360 /* Disable all tables */
4361 WREG32(VM_CONTEXT0_CNTL, 0);
4362 WREG32(VM_CONTEXT1_CNTL, 0);
4363 /* Setup TLB control */
4364 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4365 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4366 /* Setup L2 cache */
4368 ENABLE_L2_FRAGMENT_PROCESSING |
4369 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4370 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4371 EFFECTIVE_L2_QUEUE_SIZE(7) |
4372 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4373 WREG32(VM_L2_CNTL2, 0);
4374 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4375 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4376 radeon_gart_table_vram_unpin(rdev);
4380 * cik_pcie_gart_fini - vm fini callback
4382 * @rdev: radeon_device pointer
4384 * Tears down the driver GART/VM setup (CIK).
4386 static __unused void cik_pcie_gart_fini(struct radeon_device *rdev)
4388 cik_pcie_gart_disable(rdev);
4389 radeon_gart_table_vram_free(rdev);
4390 radeon_gart_fini(rdev);
4395 * cik_ib_parse - vm ib_parse callback
4397 * @rdev: radeon_device pointer
4398 * @ib: indirect buffer pointer
4400 * CIK uses hw IB checking so this is a nop (CIK).
4402 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4409 * VMID 0 is the physical GPU addresses as used by the kernel.
4410 * VMIDs 1-15 are used for userspace clients and are handled
4411 * by the radeon vm/hsa code.
4414 * cik_vm_init - cik vm init callback
4416 * @rdev: radeon_device pointer
4418 * Inits cik specific vm parameters (number of VMs, base of vram for
4419 * VMIDs 1-15) (CIK).
4420 * Returns 0 for success.
4422 int cik_vm_init(struct radeon_device *rdev)
4425 rdev->vm_manager.nvm = 16;
4426 /* base offset of vram pages */
4427 if (rdev->flags & RADEON_IS_IGP) {
4428 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4430 rdev->vm_manager.vram_base_offset = tmp;
4432 rdev->vm_manager.vram_base_offset = 0;
4438 * cik_vm_fini - cik vm fini callback
4440 * @rdev: radeon_device pointer
4442 * Tear down any asic specific VM setup (CIK).
4444 void cik_vm_fini(struct radeon_device *rdev)
4449 * cik_vm_decode_fault - print human readable fault info
4451 * @rdev: radeon_device pointer
4452 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4453 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4455 * Print human readable fault information (CIK).
4457 static void cik_vm_decode_fault(struct radeon_device *rdev,
4458 u32 status, u32 addr, u32 mc_client)
4460 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4461 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4462 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4463 char *block = (char *)&mc_client;
4465 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4466 protections, vmid, addr,
4467 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4472 * cik_vm_flush - cik vm flush using the CP
4474 * @rdev: radeon_device pointer
4476 * Update the page table base and flush the VM TLB
4477 * using the CP (CIK).
4479 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4481 struct radeon_ring *ring = &rdev->ring[ridx];
4486 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4487 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4488 WRITE_DATA_DST_SEL(0)));
4490 radeon_ring_write(ring,
4491 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4493 radeon_ring_write(ring,
4494 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4496 radeon_ring_write(ring, 0);
4497 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4499 /* update SH_MEM_* regs */
4500 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4501 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4502 WRITE_DATA_DST_SEL(0)));
4503 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4504 radeon_ring_write(ring, 0);
4505 radeon_ring_write(ring, VMID(vm->id));
4507 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4508 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4509 WRITE_DATA_DST_SEL(0)));
4510 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4511 radeon_ring_write(ring, 0);
4513 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4514 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4515 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4516 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4518 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4519 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4520 WRITE_DATA_DST_SEL(0)));
4521 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4522 radeon_ring_write(ring, 0);
4523 radeon_ring_write(ring, VMID(0));
4526 /* We should be using the WAIT_REG_MEM packet here like in
4527 * cik_fence_ring_emit(), but it causes the CP to hang in this
4530 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4531 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4532 WRITE_DATA_DST_SEL(0)));
4533 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4534 radeon_ring_write(ring, 0);
4535 radeon_ring_write(ring, 0);
4537 /* bits 0-15 are the VM contexts0-15 */
4538 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4539 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4540 WRITE_DATA_DST_SEL(0)));
4541 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4542 radeon_ring_write(ring, 0);
4543 radeon_ring_write(ring, 1 << vm->id);
4545 /* compute doesn't have PFP */
4546 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4547 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4548 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4549 radeon_ring_write(ring, 0x0);
4554 * cik_vm_set_page - update the page tables using sDMA
4556 * @rdev: radeon_device pointer
4557 * @ib: indirect buffer to fill with commands
4558 * @pe: addr of the page entry
4559 * @addr: dst addr to write into pe
4560 * @count: number of page entries to update
4561 * @incr: increase next addr by incr bytes
4562 * @flags: access flags
4564 * Update the page tables using CP or sDMA (CIK).
4566 void cik_vm_set_page(struct radeon_device *rdev,
4567 struct radeon_ib *ib,
4569 uint64_t addr, unsigned count,
4570 uint32_t incr, uint32_t flags)
4572 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4576 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4579 ndw = 2 + count * 2;
4583 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4584 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4585 WRITE_DATA_DST_SEL(1));
4586 ib->ptr[ib->length_dw++] = pe;
4587 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4588 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4589 if (flags & RADEON_VM_PAGE_SYSTEM) {
4590 value = radeon_vm_map_gart(rdev, addr);
4591 value &= 0xFFFFFFFFFFFFF000ULL;
4592 } else if (flags & RADEON_VM_PAGE_VALID) {
4598 value |= r600_flags;
4599 ib->ptr[ib->length_dw++] = value;
4600 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4605 if (flags & RADEON_VM_PAGE_SYSTEM) {
4611 /* for non-physically contiguous pages (system) */
4612 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4613 ib->ptr[ib->length_dw++] = pe;
4614 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4615 ib->ptr[ib->length_dw++] = ndw;
4616 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4617 if (flags & RADEON_VM_PAGE_SYSTEM) {
4618 value = radeon_vm_map_gart(rdev, addr);
4619 value &= 0xFFFFFFFFFFFFF000ULL;
4620 } else if (flags & RADEON_VM_PAGE_VALID) {
4626 value |= r600_flags;
4627 ib->ptr[ib->length_dw++] = value;
4628 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4637 if (flags & RADEON_VM_PAGE_VALID)
4641 /* for physically contiguous pages (vram) */
4642 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4643 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4644 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4645 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4646 ib->ptr[ib->length_dw++] = 0;
4647 ib->ptr[ib->length_dw++] = value; /* value */
4648 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4649 ib->ptr[ib->length_dw++] = incr; /* increment size */
4650 ib->ptr[ib->length_dw++] = 0;
4651 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4657 while (ib->length_dw & 0x7)
4658 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4663 * cik_dma_vm_flush - cik vm flush using sDMA
4665 * @rdev: radeon_device pointer
4667 * Update the page table base and flush the VM TLB
4670 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4672 struct radeon_ring *ring = &rdev->ring[ridx];
4673 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4674 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4680 if (ridx == R600_RING_TYPE_DMA_INDEX)
4681 ref_and_mask = SDMA0;
4683 ref_and_mask = SDMA1;
4685 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4687 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4689 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4691 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4693 /* update SH_MEM_* regs */
4694 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4696 radeon_ring_write(ring, VMID(vm->id));
4698 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4699 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4700 radeon_ring_write(ring, 0);
4702 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4703 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4704 radeon_ring_write(ring, 0);
4706 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4707 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4708 radeon_ring_write(ring, 1);
4710 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4711 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4712 radeon_ring_write(ring, 0);
4714 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4715 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4716 radeon_ring_write(ring, VMID(0));
4719 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4720 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4721 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4722 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4723 radeon_ring_write(ring, ref_and_mask); /* MASK */
4724 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4727 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4728 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4729 radeon_ring_write(ring, 1 << vm->id);
4734 * The RLC is a multi-purpose microengine that handles a
4735 * variety of functions, the most important of which is
4736 * the interrupt controller.
4739 * cik_rlc_stop - stop the RLC ME
4741 * @rdev: radeon_device pointer
4743 * Halt the RLC ME (MicroEngine) (CIK).
4745 static void cik_rlc_stop(struct radeon_device *rdev)
4750 tmp = RREG32(CP_INT_CNTL_RING0);
4751 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4752 WREG32(CP_INT_CNTL_RING0, tmp);
4754 RREG32(CB_CGTT_SCLK_CTRL);
4755 RREG32(CB_CGTT_SCLK_CTRL);
4756 RREG32(CB_CGTT_SCLK_CTRL);
4757 RREG32(CB_CGTT_SCLK_CTRL);
4759 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4760 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4762 WREG32(RLC_CNTL, 0);
4764 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4765 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4766 cik_select_se_sh(rdev, i, j);
4767 for (k = 0; k < rdev->usec_timeout; k++) {
4768 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4774 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4776 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4777 for (k = 0; k < rdev->usec_timeout; k++) {
4778 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4785 * cik_rlc_start - start the RLC ME
4787 * @rdev: radeon_device pointer
4789 * Unhalt the RLC ME (MicroEngine) (CIK).
4791 static void cik_rlc_start(struct radeon_device *rdev)
4795 WREG32(RLC_CNTL, RLC_ENABLE);
4797 tmp = RREG32(CP_INT_CNTL_RING0);
4798 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4799 WREG32(CP_INT_CNTL_RING0, tmp);
4805 * cik_rlc_resume - setup the RLC hw
4807 * @rdev: radeon_device pointer
4809 * Initialize the RLC registers, load the ucode,
4810 * and start the RLC (CIK).
4811 * Returns 0 for success, -EINVAL if the ucode is not available.
4813 static __unused int cik_rlc_resume(struct radeon_device *rdev)
4816 u32 clear_state_info[3];
4817 const __be32 *fw_data;
4822 switch (rdev->family) {
4825 size = BONAIRE_RLC_UCODE_SIZE;
4828 size = KV_RLC_UCODE_SIZE;
4831 size = KB_RLC_UCODE_SIZE;
4837 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4838 RREG32(GRBM_SOFT_RESET);
4840 WREG32(GRBM_SOFT_RESET, 0);
4841 RREG32(GRBM_SOFT_RESET);
4844 WREG32(RLC_LB_CNTR_INIT, 0);
4845 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4847 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4848 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4849 WREG32(RLC_LB_PARAMS, 0x00600408);
4850 WREG32(RLC_LB_CNTL, 0x80000004);
4852 WREG32(RLC_MC_CNTL, 0);
4853 WREG32(RLC_UCODE_CNTL, 0);
4855 fw_data = (const __be32 *)rdev->rlc_fw->data;
4856 WREG32(RLC_GPM_UCODE_ADDR, 0);
4857 for (i = 0; i < size; i++)
4858 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4859 WREG32(RLC_GPM_UCODE_ADDR, 0);
4862 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4863 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4864 clear_state_info[2] = 0;//cik_default_size;
4865 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4866 for (i = 0; i < 3; i++)
4867 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4868 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4870 cik_rlc_start(rdev);
4877 * Starting with r6xx, interrupts are handled via a ring buffer.
4878 * Ring buffers are areas of GPU accessible memory that the GPU
4879 * writes interrupt vectors into and the host reads vectors out of.
4880 * There is a rptr (read pointer) that determines where the
4881 * host is currently reading, and a wptr (write pointer)
4882 * which determines where the GPU has written. When the
4883 * pointers are equal, the ring is idle. When the GPU
4884 * writes vectors to the ring buffer, it increments the
4885 * wptr. When there is an interrupt, the host then starts
4886 * fetching commands and processing them until the pointers are
4887 * equal again at which point it updates the rptr.
4891 * cik_enable_interrupts - Enable the interrupt ring buffer
4893 * @rdev: radeon_device pointer
4895 * Enable the interrupt ring buffer (CIK).
4897 static void cik_enable_interrupts(struct radeon_device *rdev)
4899 u32 ih_cntl = RREG32(IH_CNTL);
4900 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4902 ih_cntl |= ENABLE_INTR;
4903 ih_rb_cntl |= IH_RB_ENABLE;
4904 WREG32(IH_CNTL, ih_cntl);
4905 WREG32(IH_RB_CNTL, ih_rb_cntl);
4906 rdev->ih.enabled = true;
4910 * cik_disable_interrupts - Disable the interrupt ring buffer
4912 * @rdev: radeon_device pointer
4914 * Disable the interrupt ring buffer (CIK).
4916 static void cik_disable_interrupts(struct radeon_device *rdev)
4918 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4919 u32 ih_cntl = RREG32(IH_CNTL);
4921 ih_rb_cntl &= ~IH_RB_ENABLE;
4922 ih_cntl &= ~ENABLE_INTR;
4923 WREG32(IH_RB_CNTL, ih_rb_cntl);
4924 WREG32(IH_CNTL, ih_cntl);
4925 /* set rptr, wptr to 0 */
4926 WREG32(IH_RB_RPTR, 0);
4927 WREG32(IH_RB_WPTR, 0);
4928 rdev->ih.enabled = false;
4933 * cik_disable_interrupt_state - Disable all interrupt sources
4935 * @rdev: radeon_device pointer
4937 * Clear all interrupt enable bits used by the driver (CIK).
4939 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4944 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4946 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4947 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4948 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4949 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4950 /* compute queues */
4951 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4952 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4953 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4954 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4955 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4956 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4957 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4958 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4960 WREG32(GRBM_INT_CNTL, 0);
4961 /* vline/vblank, etc. */
4962 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4963 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4964 if (rdev->num_crtc >= 4) {
4965 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4966 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4968 if (rdev->num_crtc >= 6) {
4969 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4970 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4974 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4976 /* digital hotplug */
4977 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4978 WREG32(DC_HPD1_INT_CONTROL, tmp);
4979 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4980 WREG32(DC_HPD2_INT_CONTROL, tmp);
4981 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4982 WREG32(DC_HPD3_INT_CONTROL, tmp);
4983 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4984 WREG32(DC_HPD4_INT_CONTROL, tmp);
4985 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4986 WREG32(DC_HPD5_INT_CONTROL, tmp);
4987 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4988 WREG32(DC_HPD6_INT_CONTROL, tmp);
4993 * cik_irq_init - init and enable the interrupt ring
4995 * @rdev: radeon_device pointer
4997 * Allocate a ring buffer for the interrupt controller,
4998 * enable the RLC, disable interrupts, enable the IH
4999 * ring buffer and enable it (CIK).
5000 * Called at device load and reume.
5001 * Returns 0 for success, errors for failure.
5003 static __unused int cik_irq_init(struct radeon_device *rdev)
5007 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5010 ret = r600_ih_ring_alloc(rdev);
5015 cik_disable_interrupts(rdev);
5018 ret = cik_rlc_resume(rdev);
5020 r600_ih_ring_fini(rdev);
5024 /* setup interrupt control */
5025 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5026 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5027 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5028 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5029 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5031 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5032 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5033 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5034 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5036 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5037 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5039 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5040 IH_WPTR_OVERFLOW_CLEAR |
5043 if (rdev->wb.enabled)
5044 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5046 /* set the writeback address whether it's enabled or not */
5047 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5048 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5050 WREG32(IH_RB_CNTL, ih_rb_cntl);
5052 /* set rptr, wptr to 0 */
5053 WREG32(IH_RB_RPTR, 0);
5054 WREG32(IH_RB_WPTR, 0);
5056 /* Default settings for IH_CNTL (disabled at first) */
5057 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5058 /* RPTR_REARM only works if msi's are enabled */
5059 if (rdev->msi_enabled)
5060 ih_cntl |= RPTR_REARM;
5061 WREG32(IH_CNTL, ih_cntl);
5063 /* force the active interrupt state to all disabled */
5064 cik_disable_interrupt_state(rdev);
5066 pci_enable_busmaster(rdev->dev);
5069 cik_enable_interrupts(rdev);
5075 * cik_irq_set - enable/disable interrupt sources
5077 * @rdev: radeon_device pointer
5079 * Enable interrupt sources on the GPU (vblanks, hpd,
5081 * Returns 0 for success, errors for failure.
5083 int cik_irq_set(struct radeon_device *rdev)
5085 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5086 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5087 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5088 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5089 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5090 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5091 u32 grbm_int_cntl = 0;
5092 u32 dma_cntl, dma_cntl1;
5094 if (!rdev->irq.installed) {
5095 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5098 /* don't enable anything if the ih is disabled */
5099 if (!rdev->ih.enabled) {
5100 cik_disable_interrupts(rdev);
5101 /* force the active interrupt state to all disabled */
5102 cik_disable_interrupt_state(rdev);
5106 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5107 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5108 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5109 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5110 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5111 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5113 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5114 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5116 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5117 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5118 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5119 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5120 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5121 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5122 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5123 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5125 /* enable CP interrupts on all rings */
5126 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5127 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5128 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5130 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5131 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5132 DRM_DEBUG("si_irq_set: sw int cp1\n");
5133 if (ring->me == 1) {
5134 switch (ring->pipe) {
5136 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5139 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5142 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5145 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5148 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5151 } else if (ring->me == 2) {
5152 switch (ring->pipe) {
5154 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5157 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5160 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5163 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5166 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5170 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5173 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5174 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5175 DRM_DEBUG("si_irq_set: sw int cp2\n");
5176 if (ring->me == 1) {
5177 switch (ring->pipe) {
5179 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5182 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5185 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5188 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5191 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5194 } else if (ring->me == 2) {
5195 switch (ring->pipe) {
5197 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5200 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5203 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5206 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5209 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5213 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5217 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5218 DRM_DEBUG("cik_irq_set: sw int dma\n");
5219 dma_cntl |= TRAP_ENABLE;
5222 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5223 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5224 dma_cntl1 |= TRAP_ENABLE;
5227 if (rdev->irq.crtc_vblank_int[0] ||
5228 atomic_read(&rdev->irq.pflip[0])) {
5229 DRM_DEBUG("cik_irq_set: vblank 0\n");
5230 crtc1 |= VBLANK_INTERRUPT_MASK;
5232 if (rdev->irq.crtc_vblank_int[1] ||
5233 atomic_read(&rdev->irq.pflip[1])) {
5234 DRM_DEBUG("cik_irq_set: vblank 1\n");
5235 crtc2 |= VBLANK_INTERRUPT_MASK;
5237 if (rdev->irq.crtc_vblank_int[2] ||
5238 atomic_read(&rdev->irq.pflip[2])) {
5239 DRM_DEBUG("cik_irq_set: vblank 2\n");
5240 crtc3 |= VBLANK_INTERRUPT_MASK;
5242 if (rdev->irq.crtc_vblank_int[3] ||
5243 atomic_read(&rdev->irq.pflip[3])) {
5244 DRM_DEBUG("cik_irq_set: vblank 3\n");
5245 crtc4 |= VBLANK_INTERRUPT_MASK;
5247 if (rdev->irq.crtc_vblank_int[4] ||
5248 atomic_read(&rdev->irq.pflip[4])) {
5249 DRM_DEBUG("cik_irq_set: vblank 4\n");
5250 crtc5 |= VBLANK_INTERRUPT_MASK;
5252 if (rdev->irq.crtc_vblank_int[5] ||
5253 atomic_read(&rdev->irq.pflip[5])) {
5254 DRM_DEBUG("cik_irq_set: vblank 5\n");
5255 crtc6 |= VBLANK_INTERRUPT_MASK;
5257 if (rdev->irq.hpd[0]) {
5258 DRM_DEBUG("cik_irq_set: hpd 1\n");
5259 hpd1 |= DC_HPDx_INT_EN;
5261 if (rdev->irq.hpd[1]) {
5262 DRM_DEBUG("cik_irq_set: hpd 2\n");
5263 hpd2 |= DC_HPDx_INT_EN;
5265 if (rdev->irq.hpd[2]) {
5266 DRM_DEBUG("cik_irq_set: hpd 3\n");
5267 hpd3 |= DC_HPDx_INT_EN;
5269 if (rdev->irq.hpd[3]) {
5270 DRM_DEBUG("cik_irq_set: hpd 4\n");
5271 hpd4 |= DC_HPDx_INT_EN;
5273 if (rdev->irq.hpd[4]) {
5274 DRM_DEBUG("cik_irq_set: hpd 5\n");
5275 hpd5 |= DC_HPDx_INT_EN;
5277 if (rdev->irq.hpd[5]) {
5278 DRM_DEBUG("cik_irq_set: hpd 6\n");
5279 hpd6 |= DC_HPDx_INT_EN;
5282 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5284 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5285 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5287 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5288 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5289 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5290 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5291 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5292 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5293 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5294 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5296 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5298 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5299 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5300 if (rdev->num_crtc >= 4) {
5301 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5302 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5304 if (rdev->num_crtc >= 6) {
5305 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5306 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5309 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5310 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5311 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5312 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5313 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5314 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5320 * cik_irq_ack - ack interrupt sources
5322 * @rdev: radeon_device pointer
5324 * Ack interrupt sources on the GPU (vblanks, hpd,
5325 * etc.) (CIK). Certain interrupts sources are sw
5326 * generated and do not require an explicit ack.
5328 static inline void cik_irq_ack(struct radeon_device *rdev)
5332 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5333 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5334 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5335 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5336 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5337 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5338 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5340 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5341 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5342 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5343 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5344 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5345 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5346 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5347 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5349 if (rdev->num_crtc >= 4) {
5350 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5351 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5352 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5353 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5354 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5355 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5356 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5357 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5360 if (rdev->num_crtc >= 6) {
5361 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5362 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5363 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5364 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5365 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5366 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5367 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5368 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5371 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5372 tmp = RREG32(DC_HPD1_INT_CONTROL);
5373 tmp |= DC_HPDx_INT_ACK;
5374 WREG32(DC_HPD1_INT_CONTROL, tmp);
5376 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5377 tmp = RREG32(DC_HPD2_INT_CONTROL);
5378 tmp |= DC_HPDx_INT_ACK;
5379 WREG32(DC_HPD2_INT_CONTROL, tmp);
5381 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5382 tmp = RREG32(DC_HPD3_INT_CONTROL);
5383 tmp |= DC_HPDx_INT_ACK;
5384 WREG32(DC_HPD3_INT_CONTROL, tmp);
5386 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5387 tmp = RREG32(DC_HPD4_INT_CONTROL);
5388 tmp |= DC_HPDx_INT_ACK;
5389 WREG32(DC_HPD4_INT_CONTROL, tmp);
5391 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5392 tmp = RREG32(DC_HPD5_INT_CONTROL);
5393 tmp |= DC_HPDx_INT_ACK;
5394 WREG32(DC_HPD5_INT_CONTROL, tmp);
5396 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5397 tmp = RREG32(DC_HPD5_INT_CONTROL);
5398 tmp |= DC_HPDx_INT_ACK;
5399 WREG32(DC_HPD6_INT_CONTROL, tmp);
5404 * cik_irq_disable - disable interrupts
5406 * @rdev: radeon_device pointer
5408 * Disable interrupts on the hw (CIK).
5410 static void cik_irq_disable(struct radeon_device *rdev)
5412 cik_disable_interrupts(rdev);
5413 /* Wait and acknowledge irq */
5416 cik_disable_interrupt_state(rdev);
5420 * cik_irq_disable - disable interrupts for suspend
5422 * @rdev: radeon_device pointer
5424 * Disable interrupts and stop the RLC (CIK).
5427 static void cik_irq_suspend(struct radeon_device *rdev)
5429 cik_irq_disable(rdev);
5434 * cik_irq_fini - tear down interrupt support
5436 * @rdev: radeon_device pointer
5438 * Disable interrupts on the hw and free the IH ring
5440 * Used for driver unload.
5442 static __unused void cik_irq_fini(struct radeon_device *rdev)
5444 cik_irq_suspend(rdev);
5445 r600_ih_ring_fini(rdev);
5449 * cik_get_ih_wptr - get the IH ring buffer wptr
5451 * @rdev: radeon_device pointer
5453 * Get the IH ring buffer wptr from either the register
5454 * or the writeback memory buffer (CIK). Also check for
5455 * ring buffer overflow and deal with it.
5456 * Used by cik_irq_process().
5457 * Returns the value of the wptr.
5459 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5463 if (rdev->wb.enabled)
5464 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5466 wptr = RREG32(IH_RB_WPTR);
5468 if (wptr & RB_OVERFLOW) {
5469 /* When a ring buffer overflow happen start parsing interrupt
5470 * from the last not overwritten vector (wptr + 16). Hopefully
5471 * this should allow us to catchup.
5473 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5474 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5475 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5476 tmp = RREG32(IH_RB_CNTL);
5477 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5478 WREG32(IH_RB_CNTL, tmp);
5480 return (wptr & rdev->ih.ptr_mask);
5484 * Each IV ring entry is 128 bits:
5485 * [7:0] - interrupt source id
5487 * [59:32] - interrupt source data
5488 * [63:60] - reserved
5491 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5492 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5493 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5494 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5495 * PIPE_ID - ME0 0=3D
5496 * - ME1&2 compute dispatcher (4 pipes each)
5498 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5499 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5500 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5503 * [127:96] - reserved
5506 * cik_irq_process - interrupt handler
5508 * @rdev: radeon_device pointer
5510 * Interrupt hander (CIK). Walk the IH ring,
5511 * ack interrupts and schedule work to handle
5513 * Returns irq process return code.
5515 irqreturn_t cik_irq_process(struct radeon_device *rdev)
5517 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5518 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5521 u32 src_id, src_data, ring_id;
5522 u8 me_id, pipe_id, queue_id;
5524 bool queue_hotplug = false;
5525 bool queue_reset = false;
5526 u32 addr, status, mc_client;
5528 if (!rdev->ih.enabled || rdev->shutdown)
5531 wptr = cik_get_ih_wptr(rdev);
5534 /* is somebody else already processing irqs? */
5535 if (atomic_xchg(&rdev->ih.lock, 1))
5538 rptr = rdev->ih.rptr;
5539 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5541 /* Order reading of wptr vs. reading of IH ring data */
5544 /* display interrupts */
5547 while (rptr != wptr) {
5548 /* wptr/rptr are in bytes! */
5549 ring_index = rptr / 4;
5550 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5551 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5552 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5555 case 1: /* D1 vblank/vline */
5557 case 0: /* D1 vblank */
5558 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5559 if (rdev->irq.crtc_vblank_int[0]) {
5560 drm_handle_vblank(rdev->ddev, 0);
5561 rdev->pm.vblank_sync = true;
5562 wake_up(&rdev->irq.vblank_queue);
5564 if (atomic_read(&rdev->irq.pflip[0]))
5565 radeon_crtc_handle_flip(rdev, 0);
5566 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5567 DRM_DEBUG("IH: D1 vblank\n");
5570 case 1: /* D1 vline */
5571 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5572 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5573 DRM_DEBUG("IH: D1 vline\n");
5577 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5581 case 2: /* D2 vblank/vline */
5583 case 0: /* D2 vblank */
5584 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5585 if (rdev->irq.crtc_vblank_int[1]) {
5586 drm_handle_vblank(rdev->ddev, 1);
5587 rdev->pm.vblank_sync = true;
5588 wake_up(&rdev->irq.vblank_queue);
5590 if (atomic_read(&rdev->irq.pflip[1]))
5591 radeon_crtc_handle_flip(rdev, 1);
5592 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5593 DRM_DEBUG("IH: D2 vblank\n");
5596 case 1: /* D2 vline */
5597 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5598 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5599 DRM_DEBUG("IH: D2 vline\n");
5603 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5607 case 3: /* D3 vblank/vline */
5609 case 0: /* D3 vblank */
5610 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5611 if (rdev->irq.crtc_vblank_int[2]) {
5612 drm_handle_vblank(rdev->ddev, 2);
5613 rdev->pm.vblank_sync = true;
5614 wake_up(&rdev->irq.vblank_queue);
5616 if (atomic_read(&rdev->irq.pflip[2]))
5617 radeon_crtc_handle_flip(rdev, 2);
5618 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5619 DRM_DEBUG("IH: D3 vblank\n");
5622 case 1: /* D3 vline */
5623 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5624 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5625 DRM_DEBUG("IH: D3 vline\n");
5629 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5633 case 4: /* D4 vblank/vline */
5635 case 0: /* D4 vblank */
5636 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5637 if (rdev->irq.crtc_vblank_int[3]) {
5638 drm_handle_vblank(rdev->ddev, 3);
5639 rdev->pm.vblank_sync = true;
5640 wake_up(&rdev->irq.vblank_queue);
5642 if (atomic_read(&rdev->irq.pflip[3]))
5643 radeon_crtc_handle_flip(rdev, 3);
5644 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5645 DRM_DEBUG("IH: D4 vblank\n");
5648 case 1: /* D4 vline */
5649 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5650 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5651 DRM_DEBUG("IH: D4 vline\n");
5655 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5659 case 5: /* D5 vblank/vline */
5661 case 0: /* D5 vblank */
5662 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5663 if (rdev->irq.crtc_vblank_int[4]) {
5664 drm_handle_vblank(rdev->ddev, 4);
5665 rdev->pm.vblank_sync = true;
5666 wake_up(&rdev->irq.vblank_queue);
5668 if (atomic_read(&rdev->irq.pflip[4]))
5669 radeon_crtc_handle_flip(rdev, 4);
5670 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5671 DRM_DEBUG("IH: D5 vblank\n");
5674 case 1: /* D5 vline */
5675 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5676 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5677 DRM_DEBUG("IH: D5 vline\n");
5681 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5685 case 6: /* D6 vblank/vline */
5687 case 0: /* D6 vblank */
5688 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5689 if (rdev->irq.crtc_vblank_int[5]) {
5690 drm_handle_vblank(rdev->ddev, 5);
5691 rdev->pm.vblank_sync = true;
5692 wake_up(&rdev->irq.vblank_queue);
5694 if (atomic_read(&rdev->irq.pflip[5]))
5695 radeon_crtc_handle_flip(rdev, 5);
5696 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5697 DRM_DEBUG("IH: D6 vblank\n");
5700 case 1: /* D6 vline */
5701 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5702 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5703 DRM_DEBUG("IH: D6 vline\n");
5707 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5711 case 42: /* HPD hotplug */
5714 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5715 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5716 queue_hotplug = true;
5717 DRM_DEBUG("IH: HPD1\n");
5721 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5722 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5723 queue_hotplug = true;
5724 DRM_DEBUG("IH: HPD2\n");
5728 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5729 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5730 queue_hotplug = true;
5731 DRM_DEBUG("IH: HPD3\n");
5735 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5736 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5737 queue_hotplug = true;
5738 DRM_DEBUG("IH: HPD4\n");
5742 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5743 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5744 queue_hotplug = true;
5745 DRM_DEBUG("IH: HPD5\n");
5749 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5750 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5751 queue_hotplug = true;
5752 DRM_DEBUG("IH: HPD6\n");
5756 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5762 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5763 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5764 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5765 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5766 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5768 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5770 cik_vm_decode_fault(rdev, status, addr, mc_client);
5771 /* reset addr and status */
5772 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5774 case 176: /* GFX RB CP_INT */
5775 case 177: /* GFX IB CP_INT */
5776 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5778 case 181: /* CP EOP event */
5779 DRM_DEBUG("IH: CP EOP\n");
5780 /* XXX check the bitfield order! */
5781 me_id = (ring_id & 0x60) >> 5;
5782 pipe_id = (ring_id & 0x18) >> 3;
5783 queue_id = (ring_id & 0x7) >> 0;
5786 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5790 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5791 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5792 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5793 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5797 case 184: /* CP Privileged reg access */
5798 DRM_ERROR("Illegal register access in command stream\n");
5799 /* XXX check the bitfield order! */
5800 me_id = (ring_id & 0x60) >> 5;
5801 pipe_id = (ring_id & 0x18) >> 3;
5802 queue_id = (ring_id & 0x7) >> 0;
5805 /* This results in a full GPU reset, but all we need to do is soft
5806 * reset the CP for gfx
5820 case 185: /* CP Privileged inst */
5821 DRM_ERROR("Illegal instruction in command stream\n");
5822 /* XXX check the bitfield order! */
5823 me_id = (ring_id & 0x60) >> 5;
5824 pipe_id = (ring_id & 0x18) >> 3;
5825 queue_id = (ring_id & 0x7) >> 0;
5828 /* This results in a full GPU reset, but all we need to do is soft
5829 * reset the CP for gfx
5843 case 224: /* SDMA trap event */
5844 /* XXX check the bitfield order! */
5845 me_id = (ring_id & 0x3) >> 0;
5846 queue_id = (ring_id & 0xc) >> 2;
5847 DRM_DEBUG("IH: SDMA trap\n");
5852 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5865 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5877 case 241: /* SDMA Privileged inst */
5878 case 247: /* SDMA Privileged inst */
5879 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5880 /* XXX check the bitfield order! */
5881 me_id = (ring_id & 0x3) >> 0;
5882 queue_id = (ring_id & 0xc) >> 2;
5916 case 233: /* GUI IDLE */
5917 DRM_DEBUG("IH: GUI idle\n");
5920 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5924 /* wptr/rptr are in bytes! */
5926 rptr &= rdev->ih.ptr_mask;
5929 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
5931 taskqueue_enqueue(rdev->tq, &rdev->reset_work);
5932 rdev->ih.rptr = rptr;
5933 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5934 atomic_set(&rdev->ih.lock, 0);
5936 /* make sure wptr hasn't changed while processing */
5937 wptr = cik_get_ih_wptr(rdev);
5945 * startup/shutdown callbacks
5948 * cik_startup - program the asic to a functional state
5950 * @rdev: radeon_device pointer
5952 * Programs the asic to a functional state (CIK).
5953 * Called by cik_init() and cik_resume().
5954 * Returns 0 for success, error for failure.
5956 static int cik_startup(struct radeon_device *rdev)
5958 struct radeon_ring *ring;
5961 cik_mc_program(rdev);
5963 if (rdev->flags & RADEON_IS_IGP) {
5964 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5965 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5966 r = cik_init_microcode(rdev);
5968 DRM_ERROR("Failed to load firmware!\n");
5973 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5974 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5976 r = cik_init_microcode(rdev);
5978 DRM_ERROR("Failed to load firmware!\n");
5983 r = ci_mc_load_microcode(rdev);
5985 DRM_ERROR("Failed to load MC firmware!\n");
5990 r = r600_vram_scratch_init(rdev);
5994 r = cik_pcie_gart_enable(rdev);
5999 /* allocate rlc buffers */
6000 r = si_rlc_init(rdev);
6002 DRM_ERROR("Failed to init rlc BOs!\n");
6006 /* allocate wb buffer */
6007 r = radeon_wb_init(rdev);
6011 /* allocate mec buffers */
6012 r = cik_mec_init(rdev);
6014 DRM_ERROR("Failed to init MEC BOs!\n");
6018 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6020 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6024 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6026 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6030 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6032 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6036 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6038 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6042 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6044 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6048 r = cik_uvd_resume(rdev);
6050 r = radeon_fence_driver_start_ring(rdev,
6051 R600_RING_TYPE_UVD_INDEX);
6053 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6056 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6059 if (!rdev->irq.installed) {
6060 r = radeon_irq_kms_init(rdev);
6065 r = cik_irq_init(rdev);
6067 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6068 radeon_irq_kms_fini(rdev);
6073 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6074 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6075 CP_RB0_RPTR, CP_RB0_WPTR,
6076 0, 0xfffff, RADEON_CP_PACKET2);
6080 /* set up the compute queues */
6081 /* type-2 packets are deprecated on MEC, use type-3 instead */
6082 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6083 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6084 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6085 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6088 ring->me = 1; /* first MEC */
6089 ring->pipe = 0; /* first pipe */
6090 ring->queue = 0; /* first queue */
6091 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6093 /* type-2 packets are deprecated on MEC, use type-3 instead */
6094 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6095 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6096 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6097 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6100 /* dGPU only have 1 MEC */
6101 ring->me = 1; /* first MEC */
6102 ring->pipe = 0; /* first pipe */
6103 ring->queue = 1; /* second queue */
6104 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6106 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6107 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6108 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6109 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6110 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6114 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6115 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6116 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6117 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6118 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6122 r = cik_cp_resume(rdev);
6126 r = cik_sdma_resume(rdev);
6130 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6131 if (ring->ring_size) {
6132 r = radeon_ring_init(rdev, ring, ring->ring_size,
6133 R600_WB_UVD_RPTR_OFFSET,
6134 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6135 0, 0xfffff, RADEON_CP_PACKET2);
6137 r = r600_uvd_init(rdev);
6139 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6142 r = radeon_ib_pool_init(rdev);
6144 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6148 r = radeon_vm_manager_init(rdev);
6150 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6158 * cik_resume - resume the asic to a functional state
6160 * @rdev: radeon_device pointer
6162 * Programs the asic to a functional state (CIK).
6164 * Returns 0 for success, error for failure.
6166 int cik_resume(struct radeon_device *rdev)
6171 atom_asic_init(rdev->mode_info.atom_context);
6173 /* init golden registers */
6174 cik_init_golden_registers(rdev);
6176 rdev->accel_working = true;
6177 r = cik_startup(rdev);
6179 DRM_ERROR("cik startup failed on resume\n");
6180 rdev->accel_working = false;
6189 * cik_suspend - suspend the asic
6191 * @rdev: radeon_device pointer
6193 * Bring the chip into a state suitable for suspend (CIK).
6194 * Called at suspend.
6195 * Returns 0 for success.
6197 int cik_suspend(struct radeon_device *rdev)
6199 radeon_vm_manager_fini(rdev);
6200 cik_cp_enable(rdev, false);
6201 cik_sdma_enable(rdev, false);
6202 r600_uvd_stop(rdev);
6203 radeon_uvd_suspend(rdev);
6204 cik_irq_suspend(rdev);
6205 radeon_wb_disable(rdev);
6206 cik_pcie_gart_disable(rdev);
6210 /* Plan is to move initialization in that function and use
6211 * helper function so that radeon_device_init pretty much
6212 * do nothing more than calling asic specific function. This
6213 * should also allow to remove a bunch of callback function
6217 * cik_init - asic specific driver and hw init
6219 * @rdev: radeon_device pointer
6221 * Setup asic specific driver variables and program the hw
6222 * to a functional state (CIK).
6223 * Called at driver startup.
6224 * Returns 0 for success, errors for failure.
6226 int cik_init(struct radeon_device *rdev)
6228 struct radeon_ring *ring;
6232 if (!radeon_get_bios(rdev)) {
6233 if (ASIC_IS_AVIVO(rdev))
6236 /* Must be an ATOMBIOS */
6237 if (!rdev->is_atom_bios) {
6238 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6241 r = radeon_atombios_init(rdev);
6245 /* Post card if necessary */
6246 if (!radeon_card_posted(rdev)) {
6248 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6251 DRM_INFO("GPU not posted. posting now...\n");
6252 atom_asic_init(rdev->mode_info.atom_context);
6254 /* init golden registers */
6255 cik_init_golden_registers(rdev);
6256 /* Initialize scratch registers */
6257 cik_scratch_init(rdev);
6258 /* Initialize surface registers */
6259 radeon_surface_init(rdev);
6260 /* Initialize clocks */
6261 radeon_get_clock_info(rdev->ddev);
6264 r = radeon_fence_driver_init(rdev);
6268 /* initialize memory controller */
6269 r = cik_mc_init(rdev);
6272 /* Memory manager */
6273 r = radeon_bo_init(rdev);
6277 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6278 ring->ring_obj = NULL;
6279 r600_ring_init(rdev, ring, 1024 * 1024);
6281 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6282 ring->ring_obj = NULL;
6283 r600_ring_init(rdev, ring, 1024 * 1024);
6284 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6288 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6289 ring->ring_obj = NULL;
6290 r600_ring_init(rdev, ring, 1024 * 1024);
6291 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6295 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6296 ring->ring_obj = NULL;
6297 r600_ring_init(rdev, ring, 256 * 1024);
6299 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6300 ring->ring_obj = NULL;
6301 r600_ring_init(rdev, ring, 256 * 1024);
6303 r = radeon_uvd_init(rdev);
6305 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6306 ring->ring_obj = NULL;
6307 r600_ring_init(rdev, ring, 4096);
6310 rdev->ih.ring_obj = NULL;
6311 r600_ih_ring_init(rdev, 64 * 1024);
6313 r = r600_pcie_gart_init(rdev);
6317 rdev->accel_working = true;
6318 r = cik_startup(rdev);
6320 dev_err(rdev->dev, "disabling GPU acceleration\n");
6322 cik_sdma_fini(rdev);
6326 radeon_wb_fini(rdev);
6327 radeon_ib_pool_fini(rdev);
6328 radeon_vm_manager_fini(rdev);
6329 radeon_irq_kms_fini(rdev);
6330 cik_pcie_gart_fini(rdev);
6331 rdev->accel_working = false;
6334 /* Don't start up if the MC ucode is missing.
6335 * The default clocks and voltages before the MC ucode
6336 * is loaded are not suffient for advanced operations.
6338 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6339 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6347 * cik_fini - asic specific driver and hw fini
6349 * @rdev: radeon_device pointer
6351 * Tear down the asic specific driver variables and program the hw
6352 * to an idle state (CIK).
6353 * Called at driver unload.
6355 void cik_fini(struct radeon_device *rdev)
6358 cik_sdma_fini(rdev);
6362 radeon_wb_fini(rdev);
6363 radeon_vm_manager_fini(rdev);
6364 radeon_ib_pool_fini(rdev);
6365 radeon_irq_kms_fini(rdev);
6366 r600_uvd_stop(rdev);
6367 radeon_uvd_fini(rdev);
6368 cik_pcie_gart_fini(rdev);
6369 r600_vram_scratch_fini(rdev);
6370 radeon_gem_fini(rdev);
6371 radeon_fence_driver_fini(rdev);
6372 radeon_bo_fini(rdev);
6373 radeon_atombios_fini(rdev);
6378 /* display watermark setup */
6380 * dce8_line_buffer_adjust - Set up the line buffer
6382 * @rdev: radeon_device pointer
6383 * @radeon_crtc: the selected display controller
6384 * @mode: the current display mode on the selected display
6387 * Setup up the line buffer allocation for
6388 * the selected display controller (CIK).
6389 * Returns the line buffer size in pixels.
6391 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6392 struct radeon_crtc *radeon_crtc,
6393 struct drm_display_mode *mode)
6399 * There are 6 line buffers, one for each display controllers.
6400 * There are 3 partitions per LB. Select the number of partitions
6401 * to enable based on the display width. For display widths larger
6402 * than 4096, you need use to use 2 display controllers and combine
6403 * them using the stereo blender.
6405 if (radeon_crtc->base.enabled && mode) {
6406 if (mode->crtc_hdisplay < 1920)
6408 else if (mode->crtc_hdisplay < 2560)
6410 else if (mode->crtc_hdisplay < 4096)
6413 DRM_DEBUG_KMS("Mode too big for LB!\n");
6419 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6420 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6422 if (radeon_crtc->base.enabled && mode) {
6434 /* controller not enabled, so no lb used */
6439 * cik_get_number_of_dram_channels - get the number of dram channels
6441 * @rdev: radeon_device pointer
6443 * Look up the number of video ram channels (CIK).
6444 * Used for display watermark bandwidth calculations
6445 * Returns the number of dram channels
6447 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6449 u32 tmp = RREG32(MC_SHARED_CHMAP);
6451 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6474 struct dce8_wm_params {
6475 u32 dram_channels; /* number of dram channels */
6476 u32 yclk; /* bandwidth per dram data pin in kHz */
6477 u32 sclk; /* engine clock in kHz */
6478 u32 disp_clk; /* display clock in kHz */
6479 u32 src_width; /* viewport width */
6480 u32 active_time; /* active display time in ns */
6481 u32 blank_time; /* blank time in ns */
6482 bool interlaced; /* mode is interlaced */
6483 fixed20_12 vsc; /* vertical scale ratio */
6484 u32 num_heads; /* number of active crtcs */
6485 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6486 u32 lb_size; /* line buffer allocated to pipe */
6487 u32 vtaps; /* vertical scaler taps */
6491 * dce8_dram_bandwidth - get the dram bandwidth
6493 * @wm: watermark calculation data
6495 * Calculate the raw dram bandwidth (CIK).
6496 * Used for display watermark bandwidth calculations
6497 * Returns the dram bandwidth in MBytes/s
6499 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6501 /* Calculate raw DRAM Bandwidth */
6502 fixed20_12 dram_efficiency; /* 0.7 */
6503 fixed20_12 yclk, dram_channels, bandwidth;
6506 a.full = dfixed_const(1000);
6507 yclk.full = dfixed_const(wm->yclk);
6508 yclk.full = dfixed_div(yclk, a);
6509 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6510 a.full = dfixed_const(10);
6511 dram_efficiency.full = dfixed_const(7);
6512 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6513 bandwidth.full = dfixed_mul(dram_channels, yclk);
6514 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6516 return dfixed_trunc(bandwidth);
6520 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6522 * @wm: watermark calculation data
6524 * Calculate the dram bandwidth used for display (CIK).
6525 * Used for display watermark bandwidth calculations
6526 * Returns the dram bandwidth for display in MBytes/s
6528 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6530 /* Calculate DRAM Bandwidth and the part allocated to display. */
6531 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6532 fixed20_12 yclk, dram_channels, bandwidth;
6535 a.full = dfixed_const(1000);
6536 yclk.full = dfixed_const(wm->yclk);
6537 yclk.full = dfixed_div(yclk, a);
6538 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6539 a.full = dfixed_const(10);
6540 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6541 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6542 bandwidth.full = dfixed_mul(dram_channels, yclk);
6543 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6545 return dfixed_trunc(bandwidth);
6549 * dce8_data_return_bandwidth - get the data return bandwidth
6551 * @wm: watermark calculation data
6553 * Calculate the data return bandwidth used for display (CIK).
6554 * Used for display watermark bandwidth calculations
6555 * Returns the data return bandwidth in MBytes/s
6557 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6559 /* Calculate the display Data return Bandwidth */
6560 fixed20_12 return_efficiency; /* 0.8 */
6561 fixed20_12 sclk, bandwidth;
6564 a.full = dfixed_const(1000);
6565 sclk.full = dfixed_const(wm->sclk);
6566 sclk.full = dfixed_div(sclk, a);
6567 a.full = dfixed_const(10);
6568 return_efficiency.full = dfixed_const(8);
6569 return_efficiency.full = dfixed_div(return_efficiency, a);
6570 a.full = dfixed_const(32);
6571 bandwidth.full = dfixed_mul(a, sclk);
6572 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6574 return dfixed_trunc(bandwidth);
6578 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6580 * @wm: watermark calculation data
6582 * Calculate the dmif bandwidth used for display (CIK).
6583 * Used for display watermark bandwidth calculations
6584 * Returns the dmif bandwidth in MBytes/s
6586 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6588 /* Calculate the DMIF Request Bandwidth */
6589 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6590 fixed20_12 disp_clk, bandwidth;
6593 a.full = dfixed_const(1000);
6594 disp_clk.full = dfixed_const(wm->disp_clk);
6595 disp_clk.full = dfixed_div(disp_clk, a);
6596 a.full = dfixed_const(32);
6597 b.full = dfixed_mul(a, disp_clk);
6599 a.full = dfixed_const(10);
6600 disp_clk_request_efficiency.full = dfixed_const(8);
6601 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6603 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6605 return dfixed_trunc(bandwidth);
6609 * dce8_available_bandwidth - get the min available bandwidth
6611 * @wm: watermark calculation data
6613 * Calculate the min available bandwidth used for display (CIK).
6614 * Used for display watermark bandwidth calculations
6615 * Returns the min available bandwidth in MBytes/s
6617 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6619 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6620 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6621 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6622 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6624 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6628 * dce8_average_bandwidth - get the average available bandwidth
6630 * @wm: watermark calculation data
6632 * Calculate the average available bandwidth used for display (CIK).
6633 * Used for display watermark bandwidth calculations
6634 * Returns the average available bandwidth in MBytes/s
6636 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6638 /* Calculate the display mode Average Bandwidth
6639 * DisplayMode should contain the source and destination dimensions,
6643 fixed20_12 line_time;
6644 fixed20_12 src_width;
6645 fixed20_12 bandwidth;
6648 a.full = dfixed_const(1000);
6649 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6650 line_time.full = dfixed_div(line_time, a);
6651 bpp.full = dfixed_const(wm->bytes_per_pixel);
6652 src_width.full = dfixed_const(wm->src_width);
6653 bandwidth.full = dfixed_mul(src_width, bpp);
6654 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6655 bandwidth.full = dfixed_div(bandwidth, line_time);
6657 return dfixed_trunc(bandwidth);
6661 * dce8_latency_watermark - get the latency watermark
6663 * @wm: watermark calculation data
6665 * Calculate the latency watermark (CIK).
6666 * Used for display watermark bandwidth calculations
6667 * Returns the latency watermark in ns
6669 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6671 /* First calculate the latency in ns */
6672 u32 mc_latency = 2000; /* 2000 ns. */
6673 u32 available_bandwidth = dce8_available_bandwidth(wm);
6674 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6675 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6676 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6677 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6678 (wm->num_heads * cursor_line_pair_return_time);
6679 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6680 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6681 u32 tmp, dmif_size = 12288;
6684 if (wm->num_heads == 0)
6687 a.full = dfixed_const(2);
6688 b.full = dfixed_const(1);
6689 if ((wm->vsc.full > a.full) ||
6690 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6692 ((wm->vsc.full >= a.full) && wm->interlaced))
6693 max_src_lines_per_dst_line = 4;
6695 max_src_lines_per_dst_line = 2;
6697 a.full = dfixed_const(available_bandwidth);
6698 b.full = dfixed_const(wm->num_heads);
6699 a.full = dfixed_div(a, b);
6701 b.full = dfixed_const(mc_latency + 512);
6702 c.full = dfixed_const(wm->disp_clk);
6703 b.full = dfixed_div(b, c);
6705 c.full = dfixed_const(dmif_size);
6706 b.full = dfixed_div(c, b);
6708 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6710 b.full = dfixed_const(1000);
6711 c.full = dfixed_const(wm->disp_clk);
6712 b.full = dfixed_div(c, b);
6713 c.full = dfixed_const(wm->bytes_per_pixel);
6714 b.full = dfixed_mul(b, c);
6716 lb_fill_bw = min(tmp, dfixed_trunc(b));
6718 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6719 b.full = dfixed_const(1000);
6720 c.full = dfixed_const(lb_fill_bw);
6721 b.full = dfixed_div(c, b);
6722 a.full = dfixed_div(a, b);
6723 line_fill_time = dfixed_trunc(a);
6725 if (line_fill_time < wm->active_time)
6728 return latency + (line_fill_time - wm->active_time);
6733 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6734 * average and available dram bandwidth
6736 * @wm: watermark calculation data
6738 * Check if the display average bandwidth fits in the display
6739 * dram bandwidth (CIK).
6740 * Used for display watermark bandwidth calculations
6741 * Returns true if the display fits, false if not.
6743 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6745 if (dce8_average_bandwidth(wm) <=
6746 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6753 * dce8_average_bandwidth_vs_available_bandwidth - check
6754 * average and available bandwidth
6756 * @wm: watermark calculation data
6758 * Check if the display average bandwidth fits in the display
6759 * available bandwidth (CIK).
6760 * Used for display watermark bandwidth calculations
6761 * Returns true if the display fits, false if not.
6763 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6765 if (dce8_average_bandwidth(wm) <=
6766 (dce8_available_bandwidth(wm) / wm->num_heads))
6773 * dce8_check_latency_hiding - check latency hiding
6775 * @wm: watermark calculation data
6777 * Check latency hiding (CIK).
6778 * Used for display watermark bandwidth calculations
6779 * Returns true if the display fits, false if not.
6781 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6783 u32 lb_partitions = wm->lb_size / wm->src_width;
6784 u32 line_time = wm->active_time + wm->blank_time;
6785 u32 latency_tolerant_lines;
6789 a.full = dfixed_const(1);
6790 if (wm->vsc.full > a.full)
6791 latency_tolerant_lines = 1;
6793 if (lb_partitions <= (wm->vtaps + 1))
6794 latency_tolerant_lines = 1;
6796 latency_tolerant_lines = 2;
6799 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6801 if (dce8_latency_watermark(wm) <= latency_hiding)
6808 * dce8_program_watermarks - program display watermarks
6810 * @rdev: radeon_device pointer
6811 * @radeon_crtc: the selected display controller
6812 * @lb_size: line buffer size
6813 * @num_heads: number of display controllers in use
6815 * Calculate and program the display watermarks for the
6816 * selected display controller (CIK).
6818 static void dce8_program_watermarks(struct radeon_device *rdev,
6819 struct radeon_crtc *radeon_crtc,
6820 u32 lb_size, u32 num_heads)
6822 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6823 struct dce8_wm_params wm;
6826 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6829 if (radeon_crtc->base.enabled && num_heads && mode) {
6830 pixel_period = 1000000 / (u32)mode->clock;
6831 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6833 wm.yclk = rdev->pm.current_mclk * 10;
6834 wm.sclk = rdev->pm.current_sclk * 10;
6835 wm.disp_clk = mode->clock;
6836 wm.src_width = mode->crtc_hdisplay;
6837 wm.active_time = mode->crtc_hdisplay * pixel_period;
6838 wm.blank_time = line_time - wm.active_time;
6839 wm.interlaced = false;
6840 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6841 wm.interlaced = true;
6842 wm.vsc = radeon_crtc->vsc;
6844 if (radeon_crtc->rmx_type != RMX_OFF)
6846 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6847 wm.lb_size = lb_size;
6848 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6849 wm.num_heads = num_heads;
6851 /* set for high clocks */
6852 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6853 /* set for low clocks */
6854 /* wm.yclk = low clk; wm.sclk = low clk */
6855 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6857 /* possibly force display priority to high */
6858 /* should really do this at mode validation time... */
6859 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6860 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6861 !dce8_check_latency_hiding(&wm) ||
6862 (rdev->disp_priority == 2)) {
6863 DRM_DEBUG_KMS("force priority to high\n");
6868 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6870 tmp &= ~LATENCY_WATERMARK_MASK(3);
6871 tmp |= LATENCY_WATERMARK_MASK(1);
6872 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6873 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6874 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6875 LATENCY_HIGH_WATERMARK(line_time)));
6877 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6878 tmp &= ~LATENCY_WATERMARK_MASK(3);
6879 tmp |= LATENCY_WATERMARK_MASK(2);
6880 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6881 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6882 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6883 LATENCY_HIGH_WATERMARK(line_time)));
6884 /* restore original selection */
6885 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6889 * dce8_bandwidth_update - program display watermarks
6891 * @rdev: radeon_device pointer
6893 * Calculate and program the display watermarks and line
6894 * buffer allocation (CIK).
6896 void dce8_bandwidth_update(struct radeon_device *rdev)
6898 struct drm_display_mode *mode = NULL;
6899 u32 num_heads = 0, lb_size;
6902 radeon_update_display_priority(rdev);
6904 for (i = 0; i < rdev->num_crtc; i++) {
6905 if (rdev->mode_info.crtcs[i]->base.enabled)
6908 for (i = 0; i < rdev->num_crtc; i++) {
6909 mode = &rdev->mode_info.crtcs[i]->base.mode;
6910 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6911 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6916 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6918 * @rdev: radeon_device pointer
6920 * Fetches a GPU clock counter snapshot (SI).
6921 * Returns the 64 bit clock counter snapshot.
6923 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6927 spin_lock(&rdev->gpu_clock_mutex);
6928 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6929 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6930 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6931 spin_unlock(&rdev->gpu_clock_mutex);
6935 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6936 u32 cntl_reg, u32 status_reg)
6939 struct atom_clock_dividers dividers;
6942 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6943 clock, false, ÷rs);
6947 tmp = RREG32_SMC(cntl_reg);
6948 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6949 tmp |= dividers.post_divider;
6950 WREG32_SMC(cntl_reg, tmp);
6952 for (i = 0; i < 100; i++) {
6953 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6963 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6967 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6971 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6975 int cik_uvd_resume(struct radeon_device *rdev)
6981 r = radeon_uvd_resume(rdev);
6985 /* programm the VCPU memory controller bits 0-27 */
6986 addr = rdev->uvd.gpu_addr >> 3;
6987 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 4) >> 3;
6988 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6989 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6992 size = RADEON_UVD_STACK_SIZE >> 3;
6993 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6994 WREG32(UVD_VCPU_CACHE_SIZE1, size);
6997 size = RADEON_UVD_HEAP_SIZE >> 3;
6998 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6999 WREG32(UVD_VCPU_CACHE_SIZE2, size);
7002 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7003 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7006 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7007 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));