2 * Copyright © 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
26 * $FreeBSD: src/sys/dev/drm2/i915/i915_gem_tiling.c,v 1.1 2012/05/22 11:07:44 kib Exp $
29 #include <sys/sfbuf.h>
31 #include <dev/drm/drmP.h>
32 #include <dev/drm/drm.h>
36 /** @file i915_gem_tiling.c
38 * Support for managing tiling state of buffer objects.
40 * The idea behind tiling is to increase cache hit rates by rearranging
41 * pixel data so that a group of pixel accesses are in the same cacheline.
42 * Performance improvement from doing this on the back/depth buffer are on
45 * Intel architectures make this somewhat more complicated, though, by
46 * adjustments made to addressing of data when the memory is in interleaved
47 * mode (matched pairs of DIMMS) to improve memory bandwidth.
48 * For interleaved memory, the CPU sends every sequential 64 bytes
49 * to an alternate memory channel so it can get the bandwidth from both.
51 * The GPU also rearranges its accesses for increased bandwidth to interleaved
52 * memory, and it matches what the CPU does for non-tiled. However, when tiled
53 * it does it a little differently, since one walks addresses not just in the
54 * X direction but also Y. So, along with alternating channels when bit
55 * 6 of the address flips, it also alternates when other bits flip -- Bits 9
56 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
57 * are common to both the 915 and 965-class hardware.
59 * The CPU also sometimes XORs in higher bits as well, to improve
60 * bandwidth doing strided access like we do so frequently in graphics. This
61 * is called "Channel XOR Randomization" in the MCH documentation. The result
62 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
65 * All of this bit 6 XORing has an effect on our memory management,
66 * as we need to make sure that the 3d driver can correctly address object
69 * If we don't have interleaved memory, all tiling is safe and no swizzling is
72 * When bit 17 is XORed in, we simply refuse to tile at all. Bit
73 * 17 is not just a page offset, so as we page an objet out and back in,
74 * individual pages in it will have different bit 17 addresses, resulting in
75 * each 64 bytes being swapped with its neighbor!
77 * Otherwise, if interleaved, we have to tell the 3d driver what the address
78 * swizzling it needs to do is, since it's writing with the CPU to the pages
79 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
80 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
81 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
82 * to match what the GPU expects.
86 * Detects bit 6 swizzling of address lookup between IGD access and CPU
87 * access through main memory.
90 i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
92 drm_i915_private_t *dev_priv = dev->dev_private;
93 uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
94 uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
96 if (INTEL_INFO(dev)->gen >= 6) {
97 uint32_t dimm_c0, dimm_c1;
98 dimm_c0 = I915_READ(MAD_DIMM_C0);
99 dimm_c1 = I915_READ(MAD_DIMM_C1);
100 dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
101 dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
102 /* Enable swizzling when the channels are populated with
103 * identically sized dimms. We don't need to check the 3rd
104 * channel because no cpu with gpu attached ships in that
105 * configuration. Also, swizzling only makes sense for 2
106 * channels anyway. */
107 if (dimm_c0 == dimm_c1) {
108 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
109 swizzle_y = I915_BIT_6_SWIZZLE_9;
111 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
112 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
114 } else if (IS_GEN5(dev)) {
115 /* On Ironlake whatever DRAM config, GPU always do
116 * same swizzling setup.
118 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
119 swizzle_y = I915_BIT_6_SWIZZLE_9;
120 } else if (IS_GEN2(dev)) {
121 /* As far as we know, the 865 doesn't have these bit 6
124 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
125 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
126 } else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
129 /* On 9xx chipsets, channel interleave by the CPU is
130 * determined by DCC. For single-channel, neither the CPU
131 * nor the GPU do swizzling. For dual channel interleaved,
132 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
133 * 9 for Y tiled. The CPU's interleave is independent, and
134 * can be based on either bit 11 (haven't seen this yet) or
137 dcc = I915_READ(DCC);
138 switch (dcc & DCC_ADDRESSING_MODE_MASK) {
139 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
140 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
141 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
142 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
144 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
145 if (dcc & DCC_CHANNEL_XOR_DISABLE) {
146 /* This is the base swizzling by the GPU for
149 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
150 swizzle_y = I915_BIT_6_SWIZZLE_9;
151 } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
152 /* Bit 11 swizzling by the CPU in addition. */
153 swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
154 swizzle_y = I915_BIT_6_SWIZZLE_9_11;
156 /* Bit 17 swizzling by the CPU in addition. */
157 swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
158 swizzle_y = I915_BIT_6_SWIZZLE_9_17;
162 if (dcc == 0xffffffff) {
163 DRM_ERROR("Couldn't read from MCHBAR. "
164 "Disabling tiling.\n");
165 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
166 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
169 /* The 965, G33, and newer, have a very flexible memory
170 * configuration. It will enable dual-channel mode
171 * (interleaving) on as much memory as it can, and the GPU
172 * will additionally sometimes enable different bit 6
173 * swizzling for tiled objects from the CPU.
175 * Here's what I found on the G965:
176 * slot fill memory size swizzling
177 * 0A 0B 1A 1B 1-ch 2-ch
179 * 512 0 512 0 16 1008 X
180 * 512 0 0 512 16 1008 X
181 * 0 512 0 512 16 1008 X
182 * 1024 1024 1024 0 2048 1024 O
184 * We could probably detect this based on either the DRB
185 * matching, which was the case for the swizzling required in
186 * the table above, or from the 1-ch value being less than
187 * the minimum size of a rank.
189 if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
190 swizzle_x = I915_BIT_6_SWIZZLE_NONE;
191 swizzle_y = I915_BIT_6_SWIZZLE_NONE;
193 swizzle_x = I915_BIT_6_SWIZZLE_9_10;
194 swizzle_y = I915_BIT_6_SWIZZLE_9;
198 dev_priv->mm.bit_6_swizzle_x = swizzle_x;
199 dev_priv->mm.bit_6_swizzle_y = swizzle_y;
202 /* Check pitch constriants for all chips & tiling formats */
204 i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
208 /* Linear is always fine */
209 if (tiling_mode == I915_TILING_NONE)
213 (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
218 /* check maximum stride & object size */
219 if (INTEL_INFO(dev)->gen >= 4) {
220 /* i965 stores the end address of the gtt mapping in the fence
221 * reg, so dont bother to check the size */
222 if (stride / 128 > I965_FENCE_MAX_PITCH_VAL)
229 if (size > I830_FENCE_MAX_SIZE_VAL << 20)
232 if (size > I830_FENCE_MAX_SIZE_VAL << 19)
237 /* 965+ just needs multiples of tile width */
238 if (INTEL_INFO(dev)->gen >= 4) {
239 if (stride & (tile_width - 1))
244 /* Pre-965 needs power of two tile widths */
245 if (stride < tile_width)
248 if (stride & (stride - 1))
254 /* Is the current GTT allocation valid for the change in tiling? */
256 i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
260 if (tiling_mode == I915_TILING_NONE)
263 if (INTEL_INFO(obj->base.dev)->gen >= 4)
266 if (INTEL_INFO(obj->base.dev)->gen == 3) {
267 if (obj->gtt_offset & ~I915_FENCE_START_MASK)
270 if (obj->gtt_offset & ~I830_FENCE_START_MASK)
275 * Previous chips need to be aligned to the size of the smallest
276 * fence register that can contain the object.
278 if (INTEL_INFO(obj->base.dev)->gen == 3)
283 while (size < obj->base.size)
286 if (obj->gtt_space->size != size)
289 if (obj->gtt_offset & (size - 1))
296 * Sets the tiling mode of an object, returning the required swizzling of
297 * bit 6 of addresses in the object.
300 i915_gem_set_tiling(struct drm_device *dev, void *data,
301 struct drm_file *file)
303 struct drm_i915_gem_set_tiling *args = data;
304 drm_i915_private_t *dev_priv = dev->dev_private;
305 struct drm_i915_gem_object *obj;
309 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
310 if (&obj->base == NULL)
313 if (!i915_tiling_ok(dev,
314 args->stride, obj->base.size, args->tiling_mode)) {
315 drm_gem_object_unreference(&obj->base);
319 if (obj->pin_count) {
320 drm_gem_object_unreference(&obj->base);
324 if (args->tiling_mode == I915_TILING_NONE) {
325 args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
328 if (args->tiling_mode == I915_TILING_X)
329 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
331 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
333 /* Hide bit 17 swizzling from the user. This prevents old Mesa
334 * from aborting the application on sw fallbacks to bit 17,
335 * and we use the pread/pwrite bit17 paths to swizzle for it.
336 * If there was a user that was relying on the swizzle
337 * information for drm_intel_bo_map()ed reads/writes this would
338 * break it, but we don't have any of those.
340 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
341 args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
342 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
343 args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
345 /* If we can't handle the swizzling, make it untiled. */
346 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) {
347 args->tiling_mode = I915_TILING_NONE;
348 args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
353 if (args->tiling_mode != obj->tiling_mode ||
354 args->stride != obj->stride) {
355 /* We need to rebind the object if its current allocation
356 * no longer meets the alignment restrictions for its new
357 * tiling mode. Otherwise we can just leave it alone, but
358 * need to ensure that any fence register is cleared.
360 i915_gem_release_mmap(obj);
362 obj->map_and_fenceable = obj->gtt_space == NULL ||
363 (obj->gtt_offset + obj->base.size <=
364 dev_priv->mm.gtt_mappable_end &&
365 i915_gem_object_fence_ok(obj, args->tiling_mode));
367 /* Rebind if we need a change of alignment */
368 if (!obj->map_and_fenceable) {
369 uint32_t unfenced_alignment =
370 i915_gem_get_unfenced_gtt_alignment(dev,
371 obj->base.size, args->tiling_mode);
372 if (obj->gtt_offset & (unfenced_alignment - 1))
373 ret = i915_gem_object_unbind(obj);
376 obj->tiling_changed = true;
377 obj->tiling_mode = args->tiling_mode;
378 obj->stride = args->stride;
381 /* we have to maintain this existing ABI... */
382 args->stride = obj->stride;
383 args->tiling_mode = obj->tiling_mode;
384 drm_gem_object_unreference(&obj->base);
390 * Returns the current tiling mode and required bit 6 swizzling for the object.
393 i915_gem_get_tiling(struct drm_device *dev, void *data,
394 struct drm_file *file)
396 struct drm_i915_gem_get_tiling *args = data;
397 drm_i915_private_t *dev_priv = dev->dev_private;
398 struct drm_i915_gem_object *obj;
400 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
401 if (&obj->base == NULL)
404 args->tiling_mode = obj->tiling_mode;
405 switch (obj->tiling_mode) {
407 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
410 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
412 case I915_TILING_NONE:
413 args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
416 DRM_ERROR("unknown tiling mode\n");
419 /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
420 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
421 args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
422 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
423 args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
425 drm_gem_object_unreference(&obj->base);
431 * Swap every 64 bytes of this page around, to account for it having a new
432 * bit 17 of its physical address and therefore being interpreted differently
436 i915_gem_swizzle_page(vm_page_t m)
444 sf = sf_buf_alloc(m);
445 vaddr = (char *)sf_buf_kva(sf);
447 for (i = 0; i < PAGE_SIZE; i += 128) {
448 memcpy(temp, &vaddr[i], 64);
449 memcpy(&vaddr[i], &vaddr[i + 64], 64);
450 memcpy(&vaddr[i + 64], temp, 64);
457 i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
459 int page_count = obj->base.size >> PAGE_SHIFT;
462 if (obj->bit_17 == NULL)
465 for (i = 0; i < page_count; i++) {
466 char new_bit_17 = VM_PAGE_TO_PHYS(obj->pages[i]) >> 17;
467 if ((new_bit_17 & 0x1) !=
468 (test_bit(i, obj->bit_17) != 0)) {
469 i915_gem_swizzle_page(obj->pages[i]);
470 vm_page_dirty(obj->pages[i]);
476 i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
478 int page_count = obj->base.size >> PAGE_SHIFT;
481 if (obj->bit_17 == NULL) {
482 obj->bit_17 = kmalloc(BITS_TO_LONGS(page_count) *
483 sizeof(long), DRM_I915_GEM, M_WAITOK);
486 /* XXXKIB: review locking, atomics might be not needed there */
487 for (i = 0; i < page_count; i++) {
488 if (VM_PAGE_TO_PHYS(obj->pages[i]) & (1 << 17))
489 set_bit(i, obj->bit_17);
491 clear_bit(i, obj->bit_17);