r300_cmdbuf.c revision 1450
1450N/A/*
1450N/A * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
1450N/A */
1450N/A
1450N/A/*
1450N/A * r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
1450N/A *
1450N/A * Copyright (C) The Weather Channel, Inc. 2002.
1450N/A * Copyright (C) 2004 Nicolai Haehnle.
1450N/A * All Rights Reserved.
1450N/A *
1450N/A * The Weather Channel (TM) funded Tungsten Graphics to develop the
1450N/A * initial release of the Radeon 8500 driver under the XFree86 license.
1450N/A * This notice must be preserved.
1450N/A *
1450N/A * Permission is hereby granted, free of charge, to any person obtaining a
1450N/A * copy of this software and associated documentation files (the "Software"),
1450N/A * to deal in the Software without restriction, including without limitation
1450N/A * the rights to use, copy, modify, merge, publish, distribute, sublicense,
1450N/A * and/or sell copies of the Software, and to permit persons to whom the
1450N/A * Software is furnished to do so, subject to the following conditions:
1450N/A *
1450N/A * The above copyright notice and this permission notice (including the next
1450N/A * paragraph) shall be included in all copies or substantial portions of the
1450N/A * Software.
1450N/A *
1450N/A * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1450N/A * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1450N/A * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1450N/A * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
1450N/A * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1450N/A * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1450N/A * DEALINGS IN THE SOFTWARE.
1450N/A *
1450N/A * Authors:
1450N/A * Nicolai Haehnle <prefect_@gmx.net>
1450N/A */
1450N/A
1450N/A#include "drm.h"
1450N/A#include "radeon_drm.h"
1450N/A#include "drmP.h"
1450N/A#include "radeon_drv.h"
1450N/A#include "r300_reg.h"
1450N/A
1450N/A#ifdef u
1450N/A#undef u
1450N/A#endif
1450N/A
1450N/A
1450N/A#define R300_SIMULTANEOUS_CLIPRECTS 4
1450N/A
1450N/A/*
1450N/A * Values for R300_RE_CLIPRECT_CNTL depending on the number of
1450N/A * cliprects
1450N/A */
1450N/Astatic const int r300_cliprect_cntl[4] = {
1450N/A 0xAAAA,
1450N/A 0xEEEE,
1450N/A 0xFEFE,
1450N/A 0xFFFE
1450N/A};
1450N/A
1450N/A/*
1450N/A * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
1450N/A * buffer, starting with index n.
1450N/A */
1450N/Astatic int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf, int n)
1450N/A{
1450N/A drm_clip_rect_t box;
1450N/A int nr;
1450N/A int i;
1450N/A RING_LOCALS;
1450N/A
1450N/A nr = cmdbuf->nbox - n;
1450N/A if (nr > R300_SIMULTANEOUS_CLIPRECTS)
1450N/A nr = R300_SIMULTANEOUS_CLIPRECTS;
1450N/A
1450N/A DRM_DEBUG("%i cliprects\n", nr);
1450N/A
1450N/A if (nr) {
1450N/A BEGIN_RING(6 + nr * 2);
1450N/A OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
1450N/A
1450N/A for (i = 0; i < nr; ++i) {
1450N/A if (DRM_COPY_FROM_USER_UNCHECKED
1450N/A (&box, &cmdbuf->boxes[n + i], sizeof (box))) {
1450N/A DRM_ERROR("copy cliprect faulted\n");
1450N/A return (EFAULT);
1450N/A }
1450N/A
1450N/A box.x1 =
1450N/A (box.x1 +
1450N/A R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
1450N/A box.y1 =
1450N/A (box.y1 +
1450N/A R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
1450N/A box.x2 =
1450N/A (box.x2 +
1450N/A R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
1450N/A box.y2 =
1450N/A (box.y2 +
1450N/A R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
1450N/A
1450N/A OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
1450N/A (box.y1 << R300_CLIPRECT_Y_SHIFT));
1450N/A OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
1450N/A (box.y2 << R300_CLIPRECT_Y_SHIFT));
1450N/A }
1450N/A
1450N/A OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
1450N/A
1450N/A /*
1450N/A * TODO/SECURITY: Force scissors to a safe value, otherwise
1450N/A * the client might be able to trample over memory.
1450N/A * The impact should be very limited, but I'd rather be safe
1450N/A * than sorry.
1450N/A */
1450N/A OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
1450N/A OUT_RING(0);
1450N/A OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
1450N/A ADVANCE_RING();
1450N/A } else {
1450N/A /*
1450N/A * Why we allow zero cliprect rendering:
1450N/A * There are some commands in a command buffer that must be
1450N/A * submitted even when there are no cliprects, e.g. DMA buffer
1450N/A * discard or state setting (though state setting could be
1450N/A * avoided by simulating a loss of context).
1450N/A *
1450N/A * Now since the cmdbuf interface is so chaotic right now (and
1450N/A * is bound to remain that way for a bit until things settle
1450N/A * down), it is basically impossible to filter out the commands
1450N/A * that are necessary and those that aren't.
1450N/A *
1450N/A * So I choose the safe way and don't do any filtering at all;
1450N/A * instead, I simply set up the engine so that all rendering
1450N/A * can't produce any fragments.
1450N/A */
1450N/A BEGIN_RING(2);
1450N/A OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
1450N/A ADVANCE_RING();
1450N/A }
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/Astatic u8 r300_reg_flags[0x10000 >> 2];
1450N/A
1450N/Avoid
1450N/Ar300_init_reg_flags(void)
1450N/A{
1450N/A int i;
1450N/A (void) memset(r300_reg_flags, 0, 0x10000 >> 2);
1450N/A#define ADD_RANGE_MARK(reg, count, mark) \
1450N/A for (i = ((reg) >> 2); i < ((reg) >> 2) + (count); i++)\
1450N/A r300_reg_flags[i] |= (mark);
1450N/A
1450N/A#define MARK_SAFE 1
1450N/A#define MARK_CHECK_OFFSET 2
1450N/A
1450N/A#define ADD_RANGE(reg, count) ADD_RANGE_MARK(reg, count, MARK_SAFE)
1450N/A
1450N/A /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
1450N/A ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
1450N/A ADD_RANGE(0x2080, 1);
1450N/A ADD_RANGE(R300_SE_VTE_CNTL, 2);
1450N/A ADD_RANGE(0x2134, 2);
1450N/A ADD_RANGE(0x2140, 1);
1450N/A ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
1450N/A ADD_RANGE(0x21DC, 1);
1450N/A ADD_RANGE(0x221C, 1);
1450N/A ADD_RANGE(0x2220, 4);
1450N/A ADD_RANGE(0x2288, 1);
1450N/A ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
1450N/A ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
1450N/A ADD_RANGE(R300_GB_ENABLE, 1);
1450N/A ADD_RANGE(R300_GB_MSPOS0, 5);
1450N/A ADD_RANGE(R300_TX_CNTL, 1);
1450N/A ADD_RANGE(R300_TX_ENABLE, 1);
1450N/A ADD_RANGE(0x4200, 4);
1450N/A ADD_RANGE(0x4214, 1);
1450N/A ADD_RANGE(R300_RE_POINTSIZE, 1);
1450N/A ADD_RANGE(0x4230, 3);
1450N/A ADD_RANGE(R300_RE_LINE_CNT, 1);
1450N/A ADD_RANGE(0x4238, 1);
1450N/A ADD_RANGE(0x4260, 3);
1450N/A ADD_RANGE(0x4274, 4);
1450N/A ADD_RANGE(0x4288, 5);
1450N/A ADD_RANGE(0x42A0, 1);
1450N/A ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
1450N/A ADD_RANGE(0x42B4, 1);
1450N/A ADD_RANGE(R300_RE_CULL_CNTL, 1);
1450N/A ADD_RANGE(0x42C0, 2);
1450N/A ADD_RANGE(R300_RS_CNTL_0, 2);
1450N/A ADD_RANGE(R300_RS_INTERP_0, 8);
1450N/A ADD_RANGE(R300_RS_ROUTE_0, 8);
1450N/A ADD_RANGE(0x43A4, 2);
1450N/A ADD_RANGE(0x43E8, 1);
1450N/A ADD_RANGE(R300_PFS_CNTL_0, 3);
1450N/A ADD_RANGE(R300_PFS_NODE_0, 4);
1450N/A ADD_RANGE(R300_PFS_TEXI_0, 64);
1450N/A ADD_RANGE(0x46A4, 5);
1450N/A ADD_RANGE(R300_PFS_INSTR0_0, 64);
1450N/A ADD_RANGE(R300_PFS_INSTR1_0, 64);
1450N/A ADD_RANGE(R300_PFS_INSTR2_0, 64);
1450N/A ADD_RANGE(R300_PFS_INSTR3_0, 64);
1450N/A ADD_RANGE(0x4BC0, 1);
1450N/A ADD_RANGE(0x4BC8, 3);
1450N/A ADD_RANGE(R300_PP_ALPHA_TEST, 2);
1450N/A ADD_RANGE(0x4BD8, 1);
1450N/A ADD_RANGE(R300_PFS_PARAM_0_X, 64);
1450N/A ADD_RANGE(0x4E00, 1);
1450N/A ADD_RANGE(R300_RB3D_CBLEND, 2);
1450N/A ADD_RANGE(R300_RB3D_COLORMASK, 1);
1450N/A ADD_RANGE(0x4E10, 3);
1450N/A ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);
1450N/A /* check offset */
1450N/A ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
1450N/A ADD_RANGE(0x4E50, 9);
1450N/A ADD_RANGE(0x4E88, 1);
1450N/A ADD_RANGE(0x4EA0, 2);
1450N/A ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
1450N/A ADD_RANGE(0x4F10, 4);
1450N/A ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);
1450N/A /* check offset */
1450N/A ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
1450N/A ADD_RANGE(0x4F28, 1);
1450N/A ADD_RANGE(0x4F30, 2);
1450N/A ADD_RANGE(0x4F44, 1);
1450N/A ADD_RANGE(0x4F54, 1);
1450N/A
1450N/A ADD_RANGE(R300_TX_FILTER_0, 16);
1450N/A ADD_RANGE(R300_TX_FILTER1_0, 16);
1450N/A ADD_RANGE(R300_TX_SIZE_0, 16);
1450N/A ADD_RANGE(R300_TX_FORMAT_0, 16);
1450N/A ADD_RANGE(R300_TX_PITCH_0, 16);
1450N/A /* Texture offset is dangerous and needs more checking */
1450N/A ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
1450N/A ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
1450N/A ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
1450N/A
1450N/A /* Sporadic registers used as primitives are emitted */
1450N/A ADD_RANGE(0x4f18, 1);
1450N/A ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
1450N/A ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
1450N/A ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
1450N/A
1450N/A}
1450N/A
1450N/Astatic __inline__ int r300_check_range(unsigned reg, int count)
1450N/A{
1450N/A int i;
1450N/A if (reg & ~0xffff)
1450N/A return (-1);
1450N/A for (i = (reg >> 2); i < (reg >> 2) + count; i++)
1450N/A if (r300_reg_flags[i] != MARK_SAFE)
1450N/A return (1);
1450N/A return (0);
1450N/A}
1450N/A
1450N/Astatic inline int
1450N/Ar300_emit_carefully_checked_packet0(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
1450N/A{
1450N/A int reg;
1450N/A int sz;
1450N/A int i;
1450N/A int values[64];
1450N/A RING_LOCALS;
1450N/A
1450N/A sz = header.packet0.count;
1450N/A reg = (header.packet0.reghi << 8) | header.packet0.reglo;
1450N/A
1450N/A if ((sz > 64) || (sz < 0)) {
1450N/A DRM_ERROR("Cannot emit more than 64 values at a time "
1450N/A "(reg=%04x sz=%d)\n", reg, sz);
1450N/A return (EINVAL);
1450N/A }
1450N/A for (i = 0; i < sz; i++) {
1450N/A values[i] = ((int *)(uintptr_t)cmdbuf->buf)[i];
1450N/A switch (r300_reg_flags[(reg >> 2) + i]) {
1450N/A case MARK_SAFE:
1450N/A break;
1450N/A case MARK_CHECK_OFFSET:
1450N/A if (!radeon_check_offset(dev_priv, (u32) values[i])) {
1450N/A DRM_ERROR("Offset failed range check "
1450N/A "(reg=%04x sz=%d)\n", reg, sz);
1450N/A return (EINVAL);
1450N/A }
1450N/A break;
1450N/A default:
1450N/A DRM_ERROR("Register %04x failed check as flag=%02x\n",
1450N/A reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
1450N/A return (EINVAL);
1450N/A }
1450N/A }
1450N/A
1450N/A BEGIN_RING(1 + sz);
1450N/A OUT_RING(CP_PACKET0(reg, sz - 1));
1450N/A OUT_RING_TABLE(values, sz);
1450N/A ADVANCE_RING();
1450N/A
1450N/A cmdbuf->buf += sz * 4;
1450N/A cmdbuf->bufsz -= sz * 4;
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/A/*
1450N/A * Emits a packet0 setting arbitrary registers.
1450N/A * Called by r300_do_cp_cmdbuf.
1450N/A *
1450N/A * Note that checks are performed on contents and addresses of the registers
1450N/A */
1450N/Astatic __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf,
1450N/A drm_r300_cmd_header_t header)
1450N/A{
1450N/A int reg;
1450N/A int sz;
1450N/A RING_LOCALS;
1450N/A
1450N/A sz = header.packet0.count;
1450N/A reg = (header.packet0.reghi << 8) | header.packet0.reglo;
1450N/A
1450N/A if (!sz)
1450N/A return (0);
1450N/A
1450N/A if (sz * 4 > cmdbuf->bufsz)
1450N/A return (EINVAL);
1450N/A
1450N/A if (reg + sz * 4 >= 0x10000) {
1450N/A DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n",
1450N/A reg, sz);
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A if (r300_check_range(reg, sz)) {
1450N/A /* go and check everything */
1450N/A return (r300_emit_carefully_checked_packet0(dev_priv,
1450N/A cmdbuf, header));
1450N/A }
1450N/A /*
1450N/A * the rest of the data is safe to emit, whatever the values
1450N/A * the user passed
1450N/A */
1450N/A
1450N/A BEGIN_RING(1 + sz);
1450N/A OUT_RING(CP_PACKET0(reg, sz - 1));
1450N/A OUT_RING_TABLE(cmdbuf->buf, sz);
1450N/A ADVANCE_RING();
1450N/A
1450N/A cmdbuf->buf += sz * 4;
1450N/A cmdbuf->bufsz -= sz * 4;
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/A/*
1450N/A * Uploads user-supplied vertex program instructions or parameters onto
1450N/A * the graphics card.
1450N/A * Called by r300_do_cp_cmdbuf.
1450N/A */
1450N/Astatic inline int r300_emit_vpu(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
1450N/A{
1450N/A int sz;
1450N/A int addr;
1450N/A RING_LOCALS;
1450N/A
1450N/A sz = header.vpu.count;
1450N/A addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
1450N/A
1450N/A if (!sz)
1450N/A return (0);
1450N/A if (sz * 16 > cmdbuf->bufsz)
1450N/A return (EINVAL);
1450N/A
1450N/A BEGIN_RING(5 + sz * 4);
1450N/A /* Wait for VAP to come to senses.. */
1450N/A /*
1450N/A * there is no need to emit it multiple times, (only once before
1450N/A * VAP is programmed, but this optimization is for later
1450N/A */
1450N/A OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
1450N/A OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
1450N/A OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
1450N/A OUT_RING_TABLE(cmdbuf->buf, sz * 4);
1450N/A
1450N/A ADVANCE_RING();
1450N/A
1450N/A cmdbuf->buf += sz * 16;
1450N/A cmdbuf->bufsz -= sz * 16;
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/A/*
1450N/A * Emit a clear packet from userspace.
1450N/A * Called by r300_emit_packet3.
1450N/A */
1450N/Astatic inline int r300_emit_clear(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf)
1450N/A{
1450N/A RING_LOCALS;
1450N/A
1450N/A if (8 * 4 > cmdbuf->bufsz)
1450N/A return (EINVAL);
1450N/A
1450N/A BEGIN_RING(10);
1450N/A OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
1450N/A OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
1450N/A (1 << R300_PRIM_NUM_VERTICES_SHIFT));
1450N/A OUT_RING_TABLE(cmdbuf->buf, 8);
1450N/A ADVANCE_RING();
1450N/A
1450N/A cmdbuf->buf += 8 * 4;
1450N/A cmdbuf->bufsz -= 8 * 4;
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/Astatic inline int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf, u32 header)
1450N/A{
1450N/A int count, i, k;
1450N/A#define MAX_ARRAY_PACKET 64
1450N/A u32 payload[MAX_ARRAY_PACKET];
1450N/A u32 narrays;
1450N/A RING_LOCALS;
1450N/A
1450N/A count = (header >> 16) & 0x3fff;
1450N/A
1450N/A if ((count + 1) > MAX_ARRAY_PACKET) {
1450N/A DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
1450N/A count);
1450N/A return (EINVAL);
1450N/A }
1450N/A (void) memset(payload, 0, MAX_ARRAY_PACKET * 4);
1450N/A (void) memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
1450N/A
1450N/A /* carefully check packet contents */
1450N/A
1450N/A narrays = payload[0];
1450N/A k = 0;
1450N/A i = 1;
1450N/A while ((k < narrays) && (i < (count + 1))) {
1450N/A i++; /* skip attribute field */
1450N/A if (!radeon_check_offset(dev_priv, payload[i])) {
1450N/A DRM_ERROR("Offset failed range check (k=%d i=%d) "
1450N/A "while processing 3D_LOAD_VBPNTR packet.\n",
1450N/A k, i);
1450N/A return (EINVAL);
1450N/A }
1450N/A k++;
1450N/A i++;
1450N/A if (k == narrays)
1450N/A break;
1450N/A /* have one more to process, they come in pairs */
1450N/A if (!radeon_check_offset(dev_priv, payload[i])) {
1450N/A DRM_ERROR("Offset failed range check (k=%d i=%d) "
1450N/A "while processing 3D_LOAD_VBPNTR packet.\n",
1450N/A k, i);
1450N/A return (EINVAL);
1450N/A }
1450N/A k++;
1450N/A i++;
1450N/A }
1450N/A /* do the counts match what we expect ? */
1450N/A if ((k != narrays) || (i != (count + 1))) {
1450N/A DRM_ERROR("Malformed 3D_LOAD_VBPNTR packet "
1450N/A "(k=%d i=%d narrays=%d count+1=%d).\n",
1450N/A k, i, narrays, count + 1);
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A /* all clear, output packet */
1450N/A
1450N/A BEGIN_RING(count + 2);
1450N/A OUT_RING(header);
1450N/A OUT_RING_TABLE(payload, count + 1);
1450N/A ADVANCE_RING();
1450N/A
1450N/A cmdbuf->buf += (count + 2) * 4;
1450N/A cmdbuf->bufsz -= (count + 2) * 4;
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/Astatic inline int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf)
1450N/A{
1450N/A u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf;
1450N/A int count, ret;
1450N/A RING_LOCALS;
1450N/A
1450N/A count = (cmd[0] >> 16) & 0x3fff;
1450N/A
1450N/A if (cmd[0] & 0x8000) {
1450N/A u32 offset;
1450N/A
1450N/A if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1450N/A RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
1450N/A offset = cmd[2] << 10;
1450N/A ret = !radeon_check_offset(dev_priv, offset);
1450N/A if (ret) {
1450N/A DRM_ERROR("Invalid bitblt first offset "
1450N/A "is %08X\n", offset);
1450N/A return (EINVAL);
1450N/A }
1450N/A }
1450N/A
1450N/A if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
1450N/A (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
1450N/A offset = cmd[3] << 10;
1450N/A ret = !radeon_check_offset(dev_priv, offset);
1450N/A if (ret) {
1450N/A DRM_ERROR("Invalid bitblt second offset "
1450N/A "is %08X\n", offset);
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A }
1450N/A }
1450N/A
1450N/A BEGIN_RING(count+2);
1450N/A OUT_RING(cmd[0]);
1450N/A OUT_RING_TABLE((cmdbuf->buf + 4), count + 1);
1450N/A ADVANCE_RING();
1450N/A
1450N/A cmdbuf->buf += (count+2)*4;
1450N/A cmdbuf->bufsz -= (count+2)*4;
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/A
1450N/Astatic inline int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf)
1450N/A{
1450N/A u32 *cmd = (u32 *)(uintptr_t)cmdbuf->buf;
1450N/A int count, ret;
1450N/A RING_LOCALS;
1450N/A
1450N/A count = (cmd[0]>>16) & 0x3fff;
1450N/A
1450N/A if ((cmd[1] & 0x8000ffff) != 0x80000810) {
1450N/A DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
1450N/A return (EINVAL);
1450N/A }
1450N/A ret = !radeon_check_offset(dev_priv, cmd[2]);
1450N/A if (ret) {
1450N/A DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A BEGIN_RING(count+2);
1450N/A OUT_RING(cmd[0]);
1450N/A OUT_RING_TABLE(cmdbuf->buf + 4, count + 1);
1450N/A ADVANCE_RING();
1450N/A
1450N/A cmdbuf->buf += (count+2)*4;
1450N/A cmdbuf->bufsz -= (count+2)*4;
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/A
1450N/Astatic __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf)
1450N/A{
1450N/A u32 header;
1450N/A int count;
1450N/A RING_LOCALS;
1450N/A
1450N/A if (4 > cmdbuf->bufsz)
1450N/A return (EINVAL);
1450N/A
1450N/A /*
1450N/A * Fixme !! This simply emits a packet without much checking.
1450N/A * We need to be smarter.
1450N/A */
1450N/A
1450N/A /* obtain first word - actual packet3 header */
1450N/A header = *(u32 *)(uintptr_t)cmdbuf->buf;
1450N/A
1450N/A /* Is it packet 3 ? */
1450N/A if ((header >> 30) != 0x3) {
1450N/A DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A count = (header >> 16) & 0x3fff;
1450N/A
1450N/A /* Check again now that we know how much data to expect */
1450N/A if ((count + 2) * 4 > cmdbuf->bufsz) {
1450N/A DRM_ERROR("Expected packet3 of length %d but have only "
1450N/A "%d bytes left\n", (count + 2) * 4, cmdbuf->bufsz);
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A /* Is it a packet type we know about ? */
1450N/A switch (header & 0xff00) {
1450N/A case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */
1450N/A return (r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header));
1450N/A
1450N/A case RADEON_CNTL_BITBLT_MULTI:
1450N/A return (r300_emit_bitblt_multi(dev_priv, cmdbuf));
1450N/A
1450N/A case RADEON_CP_INDX_BUFFER:
1450N/A // DRAW_INDX_2 without INDX_BUFFER seems to lock
1450N/A // up the GPU
1450N/A return (r300_emit_indx_buffer(dev_priv, cmdbuf));
1450N/A
1450N/A case RADEON_CP_3D_DRAW_IMMD_2:
1450N/A /* triggers drawing using in-packet vertex data */
1450N/A case RADEON_CP_3D_DRAW_VBUF_2:
1450N/A /* triggers drawing of vertex buffers setup elsewhere */
1450N/A case RADEON_CP_3D_DRAW_INDX_2:
1450N/A /* triggers drawing using indices to vertex buffer */
1450N/A case RADEON_WAIT_FOR_IDLE:
1450N/A case RADEON_CP_NOP:
1450N/A /* these packets are safe */
1450N/A break;
1450N/A default:
1450N/A DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A BEGIN_RING(count + 2);
1450N/A OUT_RING(header);
1450N/A OUT_RING_TABLE((cmdbuf->buf + 4), count + 1);
1450N/A ADVANCE_RING();
1450N/A
1450N/A cmdbuf->buf += (count + 2) * 4;
1450N/A cmdbuf->bufsz -= (count + 2) * 4;
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/A/*
1450N/A * Emit a rendering packet3 from userspace.
1450N/A * Called by r300_do_cp_cmdbuf.
1450N/A */
1450N/Astatic __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf, drm_r300_cmd_header_t header)
1450N/A{
1450N/A int n;
1450N/A int ret;
1450N/A char *orig_buf = cmdbuf->buf;
1450N/A int orig_bufsz = cmdbuf->bufsz;
1450N/A
1450N/A /*
1450N/A * This is a do-while-loop so that we run the interior at least once,
1450N/A * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
1450N/A */
1450N/A n = 0;
1450N/A do {
1450N/A if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
1450N/A ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
1450N/A if (ret)
1450N/A return (ret);
1450N/A
1450N/A cmdbuf->buf = orig_buf;
1450N/A cmdbuf->bufsz = orig_bufsz;
1450N/A }
1450N/A
1450N/A switch (header.packet3.packet) {
1450N/A case R300_CMD_PACKET3_CLEAR:
1450N/A DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
1450N/A ret = r300_emit_clear(dev_priv, cmdbuf);
1450N/A if (ret) {
1450N/A DRM_ERROR("r300_emit_clear failed\n");
1450N/A return (ret);
1450N/A }
1450N/A break;
1450N/A
1450N/A case R300_CMD_PACKET3_RAW:
1450N/A DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
1450N/A ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
1450N/A if (ret) {
1450N/A DRM_ERROR("r300_emit_raw_packet3 failed\n");
1450N/A return (ret);
1450N/A }
1450N/A break;
1450N/A
1450N/A default:
1450N/A DRM_ERROR("bad packet3 type %i at %p\n",
1450N/A header.packet3.packet,
1450N/A (void *)(cmdbuf->buf - sizeof (header)));
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A n += R300_SIMULTANEOUS_CLIPRECTS;
1450N/A } while (n < cmdbuf->nbox);
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/A/*
1450N/A * Some of the R300 chips seem to be extremely touchy about the two registers
1450N/A * that are configured in r300_pacify.
1450N/A * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
1450N/A * sends a command buffer that contains only state setting commands and a
1450N/A * vertex program/parameter upload sequence, this will eventually lead to a
1450N/A * lockup, unless the sequence is bracketed by calls to r300_pacify.
1450N/A * So we should take great care to *always* call r300_pacify before
1450N/A * *anything* 3D related, and again afterwards. This is what the
1450N/A * call bracket in r300_do_cp_cmdbuf is for.
1450N/A */
1450N/A
1450N/A/*
1450N/A * Emit the sequence to pacify R300.
1450N/A */
1450N/Astatic __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
1450N/A{
1450N/A RING_LOCALS;
1450N/A
1450N/A BEGIN_RING(6);
1450N/A OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
1450N/A OUT_RING(0xa);
1450N/A OUT_RING(CP_PACKET0(0x4f18, 0));
1450N/A OUT_RING(0x3);
1450N/A OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
1450N/A OUT_RING(0x0);
1450N/A ADVANCE_RING();
1450N/A}
1450N/A
1450N/A/*
1450N/A * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
1450N/A * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
1450N/A * be careful about how this function is called.
1450N/A */
1450N/Astatic void r300_discard_buffer(drm_device_t *dev, drm_buf_t *buf)
1450N/A{
1450N/A drm_radeon_private_t *dev_priv = dev->dev_private;
1450N/A drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1450N/A
1450N/A buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1450N/A buf->pending = 1;
1450N/A buf->used = 0;
1450N/A}
1450N/A
1450N/Astatic int r300_scratch(drm_radeon_private_t *dev_priv,
1450N/A drm_radeon_kcmd_buffer_t *cmdbuf,
1450N/A drm_r300_cmd_header_t header)
1450N/A{
1450N/A u32 *ref_age_base;
1450N/A u32 i, buf_idx, h_pending;
1450N/A RING_LOCALS;
1450N/A
1450N/A if (cmdbuf->bufsz < sizeof (uint64_t) +
1450N/A header.scratch.n_bufs * sizeof (buf_idx)) {
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A if (header.scratch.reg >= 5) {
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A dev_priv->scratch_ages[header.scratch.reg] ++;
1450N/A
1450N/A ref_age_base = (u32 *)(uintptr_t)*((uint64_t *)(uintptr_t)cmdbuf->buf);
1450N/A
1450N/A cmdbuf->buf += sizeof (uint64_t);
1450N/A cmdbuf->bufsz -= sizeof (uint64_t);
1450N/A
1450N/A for (i = 0; i < header.scratch.n_bufs; i++) {
1450N/A buf_idx = *(u32 *)(uintptr_t)cmdbuf->buf;
1450N/A buf_idx *= 2; /* 8 bytes per buf */
1450N/A
1450N/A if (DRM_COPY_TO_USER(ref_age_base + buf_idx,
1450N/A &dev_priv->scratch_ages[header.scratch.reg],
1450N/A sizeof (u32))) {
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A if (DRM_COPY_FROM_USER(&h_pending,
1450N/A ref_age_base + buf_idx + 1, sizeof (u32))) {
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A if (h_pending == 0) {
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A h_pending--;
1450N/A
1450N/A if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1,
1450N/A &h_pending, sizeof (u32))) {
1450N/A return (EINVAL);
1450N/A }
1450N/A
1450N/A cmdbuf->buf += sizeof (buf_idx);
1450N/A cmdbuf->bufsz -= sizeof (buf_idx);
1450N/A }
1450N/A
1450N/A BEGIN_RING(2);
1450N/A OUT_RING(CP_PACKET0(RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0));
1450N/A OUT_RING(dev_priv->scratch_ages[header.scratch.reg]);
1450N/A ADVANCE_RING();
1450N/A
1450N/A return (0);
1450N/A}
1450N/A
1450N/A/*
1450N/A * Parses and validates a user-supplied command buffer and emits appropriate
1450N/A * commands on the DMA ring buffer.
1450N/A * Called by the ioctl handler function radeon_cp_cmdbuf.
1450N/A */
1450N/A/*ARGSUSED*/
1450N/Aint
1450N/Ar300_do_cp_cmdbuf(drm_device_t *dev,
1450N/A drm_file_t *fpriv, drm_radeon_kcmd_buffer_t *cmdbuf)
1450N/A{
1450N/A drm_radeon_private_t *dev_priv = dev->dev_private;
1450N/A drm_device_dma_t *dma = dev->dma;
1450N/A drm_buf_t *buf = NULL;
1450N/A int emit_dispatch_age = 0;
1450N/A int ret = 0;
1450N/A
1450N/A DRM_DEBUG("\n");
1450N/A
1450N/A /*
1450N/A * See the comment above r300_emit_begin3d for why this call
1450N/A * must be here, and what the cleanup gotos are for.
1450N/A */
1450N/A r300_pacify(dev_priv);
1450N/A
1450N/A if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
1450N/A ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
1450N/A if (ret)
1450N/A goto cleanup;
1450N/A }
1450N/A
1450N/A while (cmdbuf->bufsz >= sizeof (drm_r300_cmd_header_t)) {
1450N/A int idx;
1450N/A drm_r300_cmd_header_t header;
1450N/A
1450N/A header.u = *(unsigned int *)(uintptr_t)cmdbuf->buf;
1450N/A
1450N/A cmdbuf->buf += sizeof (header);
1450N/A cmdbuf->bufsz -= sizeof (header);
1450N/A
1450N/A switch (header.header.cmd_type) {
1450N/A case R300_CMD_PACKET0:
1450N/A DRM_DEBUG("R300_CMD_PACKET0\n");
1450N/A ret = r300_emit_packet0(dev_priv, cmdbuf, header);
1450N/A if (ret) {
1450N/A DRM_ERROR("r300_emit_packet0 failed\n");
1450N/A goto cleanup;
1450N/A }
1450N/A break;
1450N/A
1450N/A case R300_CMD_VPU:
1450N/A DRM_DEBUG("R300_CMD_VPU\n");
1450N/A ret = r300_emit_vpu(dev_priv, cmdbuf, header);
1450N/A if (ret) {
1450N/A DRM_ERROR("r300_emit_vpu failed\n");
1450N/A goto cleanup;
1450N/A }
1450N/A break;
1450N/A
1450N/A case R300_CMD_PACKET3:
1450N/A DRM_DEBUG("R300_CMD_PACKET3\n");
1450N/A ret = r300_emit_packet3(dev_priv, cmdbuf, header);
1450N/A if (ret) {
1450N/A DRM_ERROR("r300_emit_packet3 failed\n");
1450N/A goto cleanup;
1450N/A }
1450N/A break;
1450N/A
1450N/A case R300_CMD_END3D:
1450N/A DRM_DEBUG("R300_CMD_END3D\n");
1450N/A /*
1450N/A * TODO:
1450N/A * Ideally userspace driver should not need to issue
1450N/A * this call, i.e. the drm driver should issue it
1450N/A * automatically and prevent lockups. In practice, we
1450N/A * do not understand why this call is needed and what
1450N/A * it does (except for some vague guesses that it has
1450N/A * to do with cache coherence) and so the user space
1450N/A * driver does it.
1450N/A *
1450N/A * Once we are sure which uses prevent lockups the code
1450N/A * could be moved into the kernel and the userspace
1450N/A * driver will not need to use this command.
1450N/A *
1450N/A * Note that issuing this command does not hurt anything
1450N/A * except, possibly, performance
1450N/A */
1450N/A r300_pacify(dev_priv);
1450N/A break;
1450N/A
1450N/A case R300_CMD_CP_DELAY:
1450N/A /* simple enough, we can do it here */
1450N/A DRM_DEBUG("R300_CMD_CP_DELAY\n");
1450N/A {
1450N/A int i;
1450N/A RING_LOCALS;
1450N/A
1450N/A BEGIN_RING(header.delay.count);
1450N/A for (i = 0; i < header.delay.count; i++)
1450N/A OUT_RING(RADEON_CP_PACKET2);
1450N/A ADVANCE_RING();
1450N/A }
1450N/A break;
1450N/A
1450N/A case R300_CMD_DMA_DISCARD:
1450N/A DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1450N/A idx = header.dma.buf_idx;
1450N/A if (idx < 0 || idx >= dma->buf_count) {
1450N/A DRM_ERROR("buffer index %d (of %d max)\n",
1450N/A idx, dma->buf_count - 1);
1450N/A ret = EINVAL;
1450N/A goto cleanup;
1450N/A }
1450N/A
1450N/A buf = dma->buflist[idx];
1450N/A if (buf->filp != fpriv || buf->pending) {
1450N/A DRM_ERROR("bad buffer %p %p %d\n",
1450N/A (void *)buf->filp, (void *)fpriv,
1450N/A buf->pending);
1450N/A ret = EINVAL;
1450N/A goto cleanup;
1450N/A }
1450N/A
1450N/A emit_dispatch_age = 1;
1450N/A r300_discard_buffer(dev, buf);
1450N/A break;
1450N/A
1450N/A case R300_CMD_WAIT:
1450N/A /* simple enough, we can do it here */
1450N/A DRM_DEBUG("R300_CMD_WAIT\n");
1450N/A if (header.wait.flags == 0)
1450N/A break; /* nothing to do */
1450N/A
1450N/A {
1450N/A RING_LOCALS;
1450N/A
1450N/A BEGIN_RING(2);
1450N/A OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
1450N/A OUT_RING((header.wait.flags & 0xf) << 14);
1450N/A ADVANCE_RING();
1450N/A }
1450N/A break;
1450N/A
1450N/A case R300_CMD_SCRATCH:
1450N/A DRM_DEBUG("R300_CMD_SCRATCH\n");
1450N/A ret = r300_scratch(dev_priv, cmdbuf, header);
1450N/A if (ret) {
1450N/A DRM_ERROR("r300_scratch failed\n");
1450N/A goto cleanup;
1450N/A }
1450N/A break;
1450N/A
1450N/A default:
1450N/A DRM_ERROR("bad cmd_type %i at %p\n",
1450N/A header.header.cmd_type,
1450N/A (void *)(cmdbuf->buf - sizeof (header)));
1450N/A ret = EINVAL;
1450N/A goto cleanup;
1450N/A }
1450N/A }
1450N/A
1450N/A DRM_DEBUG("END\n");
1450N/A
1450N/Acleanup:
1450N/A r300_pacify(dev_priv);
1450N/A
1450N/A /*
1450N/A * We emit the vertex buffer age here, outside the pacifier "brackets"
1450N/A * for two reasons:
1450N/A * (1) This may coalesce multiple age emissions into a single one and
1450N/A * (2) more importantly, some chips lock up hard when scratch registers
1450N/A * are written inside the pacifier bracket.
1450N/A */
1450N/A if (emit_dispatch_age) {
1450N/A RING_LOCALS;
1450N/A
1450N/A /* Emit the vertex buffer age */
1450N/A BEGIN_RING(2);
1450N/A RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
1450N/A ADVANCE_RING();
1450N/A }
1450N/A
1450N/A COMMIT_RING();
1450N/A
1450N/A return (ret);
1450N/A}