2 * Copyright 2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Alex Deucher <alexander.deucher@amd.com>
25 * __FBSDID("$FreeBSD: src/sys/dev/drm/r600_blit.c,v 1.5 2009/10/30 18:08:46 rnoland Exp $");
28 #include "dev/drm/drmP.h"
29 #include "dev/drm/drm.h"
30 #include "dev/drm/radeon_drm.h"
31 #include "dev/drm/radeon_drv.h"
33 static u32 r6xx_default_state[] =
557 static u32 r7xx_default_state[] =
1068 /* same for r6xx/r7xx */
1069 static u32 r6xx_vs[] =
1085 static u32 r6xx_ps[] =
1097 #define DI_PT_RECTLIST 0x11
1098 #define DI_INDEX_SIZE_16_BIT 0x0
1099 #define DI_SRC_SEL_AUTO_INDEX 0x2
1103 #define FMT_8_8_8_8 0x1a
1105 #define COLOR_5_6_5 8
1106 #define COLOR_8_8_8_8 0x1a
1108 #define R600_CB0_DEST_BASE_ENA (1 << 6)
1109 #define R600_TC_ACTION_ENA (1 << 23)
1110 #define R600_VC_ACTION_ENA (1 << 24)
1111 #define R600_CB_ACTION_ENA (1 << 25)
1112 #define R600_DB_ACTION_ENA (1 << 26)
1113 #define R600_SH_ACTION_ENA (1 << 27)
1114 #define R600_SMX_ACTION_ENA (1 << 28)
1116 #define R600_CB_COLOR0_SIZE 0x28060
1117 #define R600_CB_COLOR0_VIEW 0x28080
1118 #define R600_CB_COLOR0_INFO 0x280a0
1119 #define R600_CB_COLOR0_TILE 0x280c0
1120 #define R600_CB_COLOR0_FRAG 0x280e0
1121 #define R600_CB_COLOR0_MASK 0x28100
1123 #define R600_SQ_PGM_START_VS 0x28858
1124 #define R600_SQ_PGM_RESOURCES_VS 0x28868
1125 #define R600_SQ_PGM_CF_OFFSET_VS 0x288d0
1126 #define R600_SQ_PGM_START_PS 0x28840
1127 #define R600_SQ_PGM_RESOURCES_PS 0x28850
1128 #define R600_SQ_PGM_EXPORTS_PS 0x28854
1129 #define R600_SQ_PGM_CF_OFFSET_PS 0x288cc
1131 #define R600_VGT_PRIMITIVE_TYPE 0x8958
1133 #define R600_PA_SC_SCREEN_SCISSOR_TL 0x28030
1134 #define R600_PA_SC_GENERIC_SCISSOR_TL 0x28240
1135 #define R600_PA_SC_WINDOW_SCISSOR_TL 0x28204
1137 #define R600_SQ_TEX_VTX_INVALID_TEXTURE 0x0
1138 #define R600_SQ_TEX_VTX_INVALID_BUFFER 0x1
1139 #define R600_SQ_TEX_VTX_VALID_TEXTURE 0x2
1140 #define R600_SQ_TEX_VTX_VALID_BUFFER 0x3
1142 /* packet 3 type offsets */
1143 #define R600_SET_CONFIG_REG_OFFSET 0x00008000
1144 #define R600_SET_CONFIG_REG_END 0x0000ac00
1145 #define R600_SET_CONTEXT_REG_OFFSET 0x00028000
1146 #define R600_SET_CONTEXT_REG_END 0x00029000
1147 #define R600_SET_ALU_CONST_OFFSET 0x00030000
1148 #define R600_SET_ALU_CONST_END 0x00032000
1149 #define R600_SET_RESOURCE_OFFSET 0x00038000
1150 #define R600_SET_RESOURCE_END 0x0003c000
1151 #define R600_SET_SAMPLER_OFFSET 0x0003c000
1152 #define R600_SET_SAMPLER_END 0x0003cff0
1153 #define R600_SET_CTL_CONST_OFFSET 0x0003cff0
1154 #define R600_SET_CTL_CONST_END 0x0003e200
1155 #define R600_SET_LOOP_CONST_OFFSET 0x0003e200
1156 #define R600_SET_LOOP_CONST_END 0x0003e380
1157 #define R600_SET_BOOL_CONST_OFFSET 0x0003e380
1158 #define R600_SET_BOOL_CONST_END 0x00040000
1160 /* Packet 3 types */
1161 #define R600_IT_INDIRECT_BUFFER_END 0x00001700
1162 #define R600_IT_SET_PREDICATION 0x00002000
1163 #define R600_IT_REG_RMW 0x00002100
1164 #define R600_IT_COND_EXEC 0x00002200
1165 #define R600_IT_PRED_EXEC 0x00002300
1166 #define R600_IT_START_3D_CMDBUF 0x00002400
1167 #define R600_IT_DRAW_INDEX_2 0x00002700
1168 #define R600_IT_CONTEXT_CONTROL 0x00002800
1169 #define R600_IT_DRAW_INDEX_IMMD_BE 0x00002900
1170 #define R600_IT_INDEX_TYPE 0x00002A00
1171 #define R600_IT_DRAW_INDEX 0x00002B00
1172 #define R600_IT_DRAW_INDEX_AUTO 0x00002D00
1173 #define R600_IT_DRAW_INDEX_IMMD 0x00002E00
1174 #define R600_IT_NUM_INSTANCES 0x00002F00
1175 #define R600_IT_STRMOUT_BUFFER_UPDATE 0x00003400
1176 #define R600_IT_INDIRECT_BUFFER_MP 0x00003800
1177 #define R600_IT_MEM_SEMAPHORE 0x00003900
1178 #define R600_IT_MPEG_INDEX 0x00003A00
1179 #define R600_IT_WAIT_REG_MEM 0x00003C00
1180 #define R600_IT_MEM_WRITE 0x00003D00
1181 #define R600_IT_INDIRECT_BUFFER 0x00003200
1182 #define R600_IT_CP_INTERRUPT 0x00004000
1183 #define R600_IT_SURFACE_SYNC 0x00004300
1184 #define R600_IT_ME_INITIALIZE 0x00004400
1185 #define R600_IT_COND_WRITE 0x00004500
1186 #define R600_IT_EVENT_WRITE 0x00004600
1187 #define R600_IT_EVENT_WRITE_EOP 0x00004700
1188 #define R600_IT_ONE_REG_WRITE 0x00005700
1189 #define R600_IT_SET_CONFIG_REG 0x00006800
1190 #define R600_IT_SET_CONTEXT_REG 0x00006900
1191 #define R600_IT_SET_ALU_CONST 0x00006A00
1192 #define R600_IT_SET_BOOL_CONST 0x00006B00
1193 #define R600_IT_SET_LOOP_CONST 0x00006C00
1194 #define R600_IT_SET_RESOURCE 0x00006D00
1195 #define R600_IT_SET_SAMPLER 0x00006E00
1196 #define R600_IT_SET_CTL_CONST 0x00006F00
1197 #define R600_IT_SURFACE_BASE_UPDATE 0x00007300
1200 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
1211 cb_color_info = ((format << 2) | (1 << 27));
1212 pitch = (w / 8) - 1;
1213 slice = ((w * h) / 64) - 1;
1215 if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
1216 ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
1218 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1219 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1220 OUT_RING(gpu_addr >> 8);
1221 OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
1225 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1226 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1227 OUT_RING(gpu_addr >> 8);
1230 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1231 OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1232 OUT_RING((pitch << 0) | (slice << 10));
1234 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1235 OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1238 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1239 OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1240 OUT_RING(cb_color_info);
1242 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1243 OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1246 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1247 OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1250 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1251 OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1258 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
1259 u32 sync_type, u32 size, u64 mc_addr)
1265 if (size == 0xffffffff)
1266 cp_coher_size = 0xffffffff;
1268 cp_coher_size = ((size + 255) >> 8);
1271 OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
1272 OUT_RING(sync_type);
1273 OUT_RING(cp_coher_size);
1274 OUT_RING((mc_addr >> 8));
1275 OUT_RING(10); /* poll interval */
1280 set_shaders(struct drm_device *dev)
1282 drm_radeon_private_t *dev_priv = dev->dev_private;
1286 uint32_t sq_pgm_resources;
1291 vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
1292 ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
1294 shader_size = sizeof(r6xx_vs) / 4;
1295 for (i= 0; i < shader_size; i++)
1297 shader_size = sizeof(r6xx_ps) / 4;
1298 for (i= 0; i < shader_size; i++)
1301 dev_priv->blit_vb->used = 512;
1303 gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
1305 /* setup shader regs */
1306 sq_pgm_resources = (1 << 0);
1310 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1311 OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1312 OUT_RING(gpu_addr >> 8);
1314 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1315 OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1316 OUT_RING(sq_pgm_resources);
1318 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1319 OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1323 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1324 OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1325 OUT_RING((gpu_addr + 256) >> 8);
1327 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1328 OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1329 OUT_RING(sq_pgm_resources | (1 << 28));
1331 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1332 OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1335 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1336 OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1340 cp_set_surface_sync(dev_priv,
1341 R600_SH_ACTION_ENA, 512, gpu_addr);
1345 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
1347 uint32_t sq_vtx_constant_word2;
1351 sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
1354 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
1356 OUT_RING(gpu_addr & 0xffffffff);
1358 OUT_RING(sq_vtx_constant_word2);
1362 OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
1365 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
1366 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
1367 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
1368 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
1369 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
1370 cp_set_surface_sync(dev_priv,
1371 R600_TC_ACTION_ENA, 48, gpu_addr);
1373 cp_set_surface_sync(dev_priv,
1374 R600_VC_ACTION_ENA, 48, gpu_addr);
1378 set_tex_resource(drm_radeon_private_t *dev_priv,
1379 int format, int w, int h, int pitch, u64 gpu_addr)
1381 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
1388 sq_tex_resource_word0 = (1 << 0);
1389 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
1392 sq_tex_resource_word1 = (format << 26);
1393 sq_tex_resource_word1 |= ((h - 1) << 0);
1395 sq_tex_resource_word4 = ((1 << 14) |
1402 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
1404 OUT_RING(sq_tex_resource_word0);
1405 OUT_RING(sq_tex_resource_word1);
1406 OUT_RING(gpu_addr >> 8);
1407 OUT_RING(gpu_addr >> 8);
1408 OUT_RING(sq_tex_resource_word4);
1410 OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
1416 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
1422 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
1423 OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1424 OUT_RING((x1 << 0) | (y1 << 16));
1425 OUT_RING((x2 << 0) | (y2 << 16));
1427 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
1428 OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1429 OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
1430 OUT_RING((x2 << 0) | (y2 << 16));
1432 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
1433 OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1434 OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
1435 OUT_RING((x2 << 0) | (y2 << 16));
1440 draw_auto(drm_radeon_private_t *dev_priv)
1446 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
1447 OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
1448 OUT_RING(DI_PT_RECTLIST);
1450 OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
1451 OUT_RING(DI_INDEX_SIZE_16_BIT);
1453 OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
1456 OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
1458 OUT_RING(DI_SRC_SEL_AUTO_INDEX);
1465 set_default_state(drm_radeon_private_t *dev_priv)
1467 int default_state_dw, i;
1468 u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
1469 u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
1470 int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
1471 int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
1472 int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
1475 switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
1482 num_ps_threads = 136;
1483 num_vs_threads = 48;
1486 num_ps_stack_entries = 128;
1487 num_vs_stack_entries = 128;
1488 num_gs_stack_entries = 0;
1489 num_es_stack_entries = 0;
1498 num_ps_threads = 144;
1499 num_vs_threads = 40;
1502 num_ps_stack_entries = 40;
1503 num_vs_stack_entries = 40;
1504 num_gs_stack_entries = 32;
1505 num_es_stack_entries = 16;
1517 num_ps_threads = 136;
1518 num_vs_threads = 48;
1521 num_ps_stack_entries = 40;
1522 num_vs_stack_entries = 40;
1523 num_gs_stack_entries = 32;
1524 num_es_stack_entries = 16;
1532 num_ps_threads = 136;
1533 num_vs_threads = 48;
1536 num_ps_stack_entries = 40;
1537 num_vs_stack_entries = 40;
1538 num_gs_stack_entries = 32;
1539 num_es_stack_entries = 16;
1547 num_ps_threads = 188;
1548 num_vs_threads = 60;
1551 num_ps_stack_entries = 256;
1552 num_vs_stack_entries = 256;
1553 num_gs_stack_entries = 0;
1554 num_es_stack_entries = 0;
1563 num_ps_threads = 188;
1564 num_vs_threads = 60;
1567 num_ps_stack_entries = 128;
1568 num_vs_stack_entries = 128;
1569 num_gs_stack_entries = 0;
1570 num_es_stack_entries = 0;
1578 num_ps_threads = 144;
1579 num_vs_threads = 48;
1582 num_ps_stack_entries = 128;
1583 num_vs_stack_entries = 128;
1584 num_gs_stack_entries = 0;
1585 num_es_stack_entries = 0;
1589 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
1590 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
1591 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
1592 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
1593 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
1596 sq_config = R600_VC_ENABLE;
1598 sq_config |= (R600_DX9_CONSTS |
1599 R600_ALU_INST_PREFER_VECTOR |
1605 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
1606 R600_NUM_VS_GPRS(num_vs_gprs) |
1607 R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
1608 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
1609 R600_NUM_ES_GPRS(num_es_gprs));
1610 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
1611 R600_NUM_VS_THREADS(num_vs_threads) |
1612 R600_NUM_GS_THREADS(num_gs_threads) |
1613 R600_NUM_ES_THREADS(num_es_threads));
1614 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
1615 R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
1616 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
1617 R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
1619 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
1620 default_state_dw = sizeof(r7xx_default_state) / 4;
1621 BEGIN_RING(default_state_dw + 10);
1622 for (i = 0; i < default_state_dw; i++)
1623 OUT_RING(r7xx_default_state[i]);
1625 default_state_dw = sizeof(r6xx_default_state) / 4;
1626 BEGIN_RING(default_state_dw + 10);
1627 for (i = 0; i < default_state_dw; i++)
1628 OUT_RING(r6xx_default_state[i]);
1630 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
1631 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
1633 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
1634 OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
1635 OUT_RING(sq_config);
1636 OUT_RING(sq_gpr_resource_mgmt_1);
1637 OUT_RING(sq_gpr_resource_mgmt_2);
1638 OUT_RING(sq_thread_resource_mgmt);
1639 OUT_RING(sq_stack_resource_mgmt_1);
1640 OUT_RING(sq_stack_resource_mgmt_2);
1644 static inline uint32_t i2f(uint32_t input)
1646 u32 result, i, exponent, fraction;
1648 if ((input & 0x3fff) == 0)
1649 result = 0; /* 0 is a special case */
1651 exponent = 140; /* exponent biased by 127; */
1652 fraction = (input & 0x3fff) << 10; /* cheat and only
1653 handle numbers below 2^^15 */
1654 for (i = 0; i < 14; i++) {
1655 if (fraction & 0x800000)
1658 fraction = fraction << 1; /* keep
1659 shifting left until top bit = 1 */
1660 exponent = exponent -1;
1663 result = exponent << 23 | (fraction & 0x7fffff); /* mask
1664 off top bit; assumed 1 */
1670 r600_prepare_blit_copy(struct drm_device *dev)
1672 drm_radeon_private_t *dev_priv = dev->dev_private;
1675 dev_priv->blit_vb = radeon_freelist_get(dev);
1676 if (!dev_priv->blit_vb) {
1677 DRM_ERROR("Unable to allocate vertex buffer for blit\n");
1681 set_default_state(dev_priv);
1688 r600_done_blit_copy(struct drm_device *dev)
1690 drm_radeon_private_t *dev_priv = dev->dev_private;
1695 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
1696 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
1697 /* wait for 3D idle clean */
1698 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
1699 OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
1700 OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
1705 dev_priv->blit_vb->used = 0;
1706 radeon_cp_discard_buffer(dev, dev_priv->blit_vb);
1710 r600_blit_copy(struct drm_device *dev,
1711 uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
1714 drm_radeon_private_t *dev_priv = dev->dev_private;
1719 vb = (u32 *) ((char *)dev->agp_buffer_map->handle +
1720 dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
1721 DRM_DEBUG("src=0x%016llx, dst=0x%016llx, size=%d\n",
1722 (unsigned long long)src_gpu_addr,
1723 (unsigned long long)dst_gpu_addr, size_bytes);
1725 if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
1728 while (size_bytes) {
1729 int cur_size = size_bytes;
1730 int src_x = src_gpu_addr & 255;
1731 int dst_x = dst_gpu_addr & 255;
1733 src_gpu_addr = src_gpu_addr & ~255;
1734 dst_gpu_addr = dst_gpu_addr & ~255;
1736 if (!src_x && !dst_x) {
1737 h = (cur_size / max_bytes);
1743 cur_size = max_bytes;
1745 if (cur_size > max_bytes)
1746 cur_size = max_bytes;
1747 if (cur_size > (max_bytes - dst_x))
1748 cur_size = (max_bytes - dst_x);
1749 if (cur_size > (max_bytes - src_x))
1750 cur_size = (max_bytes - src_x);
1753 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
1754 dev_priv->blit_vb->used = 0;
1755 radeon_cp_discard_buffer(dev, dev_priv->blit_vb);
1756 dev_priv->blit_vb = radeon_freelist_get(dev);
1757 if (!dev_priv->blit_vb)
1760 vb = (u32 *) ((char *)dev->agp_buffer_map->handle +
1761 dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
1774 vb[8] = i2f(dst_x + cur_size);
1776 vb[10] = i2f(src_x + cur_size);
1780 set_tex_resource(dev_priv, FMT_8,
1781 src_x + cur_size, h, src_x + cur_size,
1784 cp_set_surface_sync(dev_priv,
1785 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
1788 set_render_target(dev_priv, COLOR_8,
1789 dst_x + cur_size, h,
1793 set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
1795 /* Vertex buffer setup */
1796 vb_addr = dev_priv->gart_buffers_offset +
1797 dev_priv->blit_vb->offset +
1798 dev_priv->blit_vb->used;
1799 set_vtx_resource(dev_priv, vb_addr);
1802 draw_auto(dev_priv);
1804 cp_set_surface_sync(dev_priv,
1805 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
1806 cur_size * h, dst_gpu_addr);
1809 dev_priv->blit_vb->used += 12 * 4;
1811 src_gpu_addr += cur_size * h;
1812 dst_gpu_addr += cur_size * h;
1813 size_bytes -= cur_size * h;
1816 max_bytes = 8192 * 4;
1818 while (size_bytes) {
1819 int cur_size = size_bytes;
1820 int src_x = (src_gpu_addr & 255);
1821 int dst_x = (dst_gpu_addr & 255);
1823 src_gpu_addr = src_gpu_addr & ~255;
1824 dst_gpu_addr = dst_gpu_addr & ~255;
1826 if (!src_x && !dst_x) {
1827 h = (cur_size / max_bytes);
1833 cur_size = max_bytes;
1835 if (cur_size > max_bytes)
1836 cur_size = max_bytes;
1837 if (cur_size > (max_bytes - dst_x))
1838 cur_size = (max_bytes - dst_x);
1839 if (cur_size > (max_bytes - src_x))
1840 cur_size = (max_bytes - src_x);
1843 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
1844 dev_priv->blit_vb->used = 0;
1845 radeon_cp_discard_buffer(dev, dev_priv->blit_vb);
1846 dev_priv->blit_vb = radeon_freelist_get(dev);
1847 if (!dev_priv->blit_vb)
1850 vb = (u32 *) ((char *)dev->agp_buffer_map->handle +
1851 dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
1854 vb[0] = i2f(dst_x / 4);
1856 vb[2] = i2f(src_x / 4);
1859 vb[4] = i2f(dst_x / 4);
1861 vb[6] = i2f(src_x / 4);
1864 vb[8] = i2f((dst_x + cur_size) / 4);
1866 vb[10] = i2f((src_x + cur_size) / 4);
1870 set_tex_resource(dev_priv, FMT_8_8_8_8,
1871 (src_x + cur_size) / 4,
1872 h, (src_x + cur_size) / 4,
1875 cp_set_surface_sync(dev_priv,
1876 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
1879 set_render_target(dev_priv, COLOR_8_8_8_8,
1880 (dst_x + cur_size) / 4, h,
1884 set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
1886 /* Vertex buffer setup */
1887 vb_addr = dev_priv->gart_buffers_offset +
1888 dev_priv->blit_vb->offset +
1889 dev_priv->blit_vb->used;
1890 set_vtx_resource(dev_priv, vb_addr);
1893 draw_auto(dev_priv);
1895 cp_set_surface_sync(dev_priv,
1896 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
1897 cur_size * h, dst_gpu_addr);
1900 dev_priv->blit_vb->used += 12 * 4;
1902 src_gpu_addr += cur_size * h;
1903 dst_gpu_addr += cur_size * h;
1904 size_bytes -= cur_size * h;
1910 r600_blit_swap(struct drm_device *dev,
1911 uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
1912 int sx, int sy, int dx, int dy,
1913 int w, int h, int src_pitch, int dst_pitch, int cpp)
1915 drm_radeon_private_t *dev_priv = dev->dev_private;
1916 int cb_format, tex_format;
1917 int sx2, sy2, dx2, dy2;
1921 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
1922 dev_priv->blit_vb->used = 0;
1923 radeon_cp_discard_buffer(dev, dev_priv->blit_vb);
1924 dev_priv->blit_vb = radeon_freelist_get(dev);
1925 if (!dev_priv->blit_vb)
1929 vb = (u32 *) ((char *)dev->agp_buffer_map->handle +
1930 dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
1954 cb_format = COLOR_8_8_8_8;
1955 tex_format = FMT_8_8_8_8;
1958 cb_format = COLOR_5_6_5;
1959 tex_format = FMT_5_6_5;
1962 cb_format = COLOR_8;
1968 set_tex_resource(dev_priv, tex_format,
1970 sy2, src_pitch / cpp,
1973 cp_set_surface_sync(dev_priv,
1974 R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
1977 set_render_target(dev_priv, cb_format,
1978 dst_pitch / cpp, dy2,
1982 set_scissors(dev_priv, dx, dy, dx2, dy2);
1984 /* Vertex buffer setup */
1985 vb_addr = dev_priv->gart_buffers_offset +
1986 dev_priv->blit_vb->offset +
1987 dev_priv->blit_vb->used;
1988 set_vtx_resource(dev_priv, vb_addr);
1991 draw_auto(dev_priv);
1993 cp_set_surface_sync(dev_priv,
1994 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
1995 dst_pitch * dy2, dst_gpu_addr);
1997 dev_priv->blit_vb->used += 12 * 4;