/*
*/
/*
* Copyright (c) 2009, 2013, Intel Corporation.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
#include <sys/x86_archext.h>
#include <sys/vfs_opreg.h>
#include "drmP.h"
#include "drm.h"
#include "drm_mm.h"
#include "i915_drm.h"
#include "i915_drv.h"
#include "intel_drv.h"
unsigned alignment,
bool map_and_fenceable,
bool nonblocking);
struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
struct drm_i915_gem_object *obj);
struct drm_i915_fence_reg *fence,
bool enable);
{
if (obj->tiling_mode)
/* As we do not have an associated fence register, we will force
* a tiling change if we ever need to acquire one.
*/
obj->fence_dirty = false;
}
/* some bookkeeping */
{
}
{
}
static int
{
if (EXIT_COND)
return 0;
/*
* Only wait 10 seconds for the gpu reset to complete to avoid hanging
* userspace. If it takes that long something really bad is going on and
* we should simply try to bail out and fail as gracefully as possible.
*/
DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
return -EIO;
}
return 0;
}
int
{
int ret;
if (ret)
return ret;
/* fix me mutex_lock_interruptible */
return 0;
}
static inline bool
{
}
int
/* LINTED */
{
return -ENODEV;
return -EINVAL;
/* GEM with user mode setting was never supported on ilk and later. */
return -ENODEV;
return 0;
}
int
/* LINTED */
{
pinned = 0;
return 0;
}
{
return NULL;
}
{
}
static int
struct drm_device *dev,
{
int ret;
if (size == 0)
return -EINVAL;
/* Allocate the new object */
return -ENOMEM;
if (ret) {
return ret;
}
/* drop reference from allocate - handle holds it now */
return 0;
}
int
struct drm_device *dev,
struct drm_mode_create_dumb *args)
{
}
struct drm_device *dev,
{
}
/**
* Creates a new mm object and returns a handle to it.
*/
int
/* LINTED */
{
}
static inline void
int gpu_offset,
int cpu_offset,
int length,
int is_read)
{
int ret;
/* Use the unswizzled path if this page isn't affected. */
if (is_read)
else
if (ret)
return;
}
/* Copy the data, XORing A6 with A17 (1). The user already knows he's
* XORing with the other bits (A9 for Y, A9 and A10 for X)
*/
while (length > 0) {
if (is_read) {
} else {
}
length -= this_length;
}
if (ret)
}
int
/* LINTED */
struct drm_i915_gem_object *obj,
struct drm_i915_gem_pread *args,
{
int ret = 0;
int do_bit17_swizzling;
int needs_clflush = 0;
/* If we're not in the cpu read domain, set ourself into the gtt
* read domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will dirty the data
* anyway again before the next pread happens. */
needs_clflush = 1;
if (ret)
return ret;
}
}
if (ret)
return ret;
if (needs_clflush)
if (do_bit17_swizzling) {
while (remain > 0) {
/* Operation in this page
*
* shmem_page_index = page number within shmem file
* shmem_page_offset = offset within page in shmem file
* data_page_index = page number in get_user_pages return
* data_page_offset = offset with data_page_index page.
* page_length = bytes to copy for this page
*/
1);
remain -= page_length;
data_ptr += page_length;
offset += page_length;
}
} else {
if (ret)
}
return ret;
}
/**
* Reads data from the object referenced by handle.
*
* On error, the contents of *data are undefined.
*/
int
/* LINTED */
{
int ret = 0;
return 0;
if (ret)
return ret;
goto unlock;
}
/* Bounds check source. */
goto out;
}
* pages from.
*/
out:
return ret;
}
static int
struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
/* LINTED */
{
int ret = 0;
if (ret)
goto out;
if (ret)
goto out_unpin;
if (ret)
goto out_unpin;
if (ret) {
return ret;
}
out:
return ret;
}
int
struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
/* LINTED */
{
int ret = 0;
int needs_clflush_after = 0;
int needs_clflush_before = 0;
int do_bit17_swizzling;
/* Pin the user pages containing the data. We can't fault while
* holding the struct mutex, and all of the pwrite implementations
* want to hold it while dereferencing the user data.
*/
/* If we're not in the cpu write domain, set ourself into the gtt
* write domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway. */
needs_clflush_after = 1;
if (ret)
return ret;
}
}
/* Same trick applies for invalidate partially written cachelines before
* writing. */
needs_clflush_before = 1;
if (ret)
return ret;
if (needs_clflush_before)
if (do_bit17_swizzling) {
while (remain > 0) {
/* Operation in this page
*
* shmem_page_index = page number within shmem file
* shmem_page_offset = offset within page in shmem file
* data_page_index = page number in get_user_pages return
* data_page_offset = offset with data_page_index page.
* page_length = bytes to copy for this page
*/
0);
remain -= page_length;
data_ptr += page_length;
offset += page_length;
}
} else {
if (ret)
}
if (needs_clflush_after)
return ret;
}
/**
* Writes data to the object referenced by handle.
*
* On error, the contents of the buffer that were to be modified are undefined.
*/
int
/* LINTED */
{
int ret;
return 0;
if (ret)
return ret;
goto unlock;
}
/* Bounds check destination. */
goto out;
}
* pages from.
*/
/* We can only do the GTT pwrite on untiled buffers, as otherwise
* it would end up going through the fenced access, and we'll get
* different detiling behavior between reading and writing.
* perspective, requiring manual detiling by the client.
*/
goto out;
}
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
* textures). Fallback to the shmem path in that case. */
/* Flushing cursor object */
}
out:
return ret;
}
int
bool interruptible)
{
if (i915_reset_in_progress(error)) {
/* Non-interruptible callers can't handle -EAGAIN, hence return
* -EIO unconditionally for these. */
if (!interruptible)
return -EIO;
/* Recovery complete, but the reset failed ... */
if (i915_terminally_wedged(error))
return -EIO;
return -EAGAIN;
}
return 0;
}
/*
* Compare seqno against outstanding lazy request. Emit a request if they are
* equal.
*/
static int
{
int ret;
ret = 0;
return ret;
}
/**
* __wait_seqno - wait until execution of seqno has finished
* @ring: the ring expected to report seqno
* @seqno: duh!
* @reset_counter: reset sequence associated with the given seqno
* @interruptible: do an interruptible wait (normally yes)
* @timeout: in - how long to wait (NULL forever); out - how much time remaining
*
* Note: It is of utmost importance that the passed in seqno and reset_counter
* values have been read by the caller in an smp safe manner. Where read-side
* locks are involved, it is sufficient to read the reset_counter before
* unlocking the lock that protects the seqno. For lockless tricks, the
* reset_counter _must_ be read before, and an appropriate smp_rmb must be
* inserted.
*
* Returns 0 if the seqno was found within the alloted time. Else returns the
* errno with remaining time filled in timeout argument.
*/
unsigned reset_counter,
{
bool wait_forever = false;
return 0;
if (wait_time == 0) {
}
return -ENODEV;
#define EXIT_COND \
do {
/* busy check is faster than cv wait on gen6+ */
/*
* Frequently read CS register may cause my GEN7 platform hang,
* but it's crucial for missed IRQ issue.
* So the first wait busy check the seqno,
* the second wait force correct ordering
* between irq and seqno writes then check again.
*/
2500)) {
500)) {
}
}
} else {
}
/* We need to check whether any gpu reset happened in between
* the caller grabbing the seqno and now ... */
/* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
* gone. */
if (end)
} while (end == 0 && wait_forever);
if (ret) {
}
DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
}
return (ret);
}
/**
* Waits for a sequence number to be signaled, and cleans up the
* request and object lists appropriately for that event.
*/
int
{
int ret;
if (ret)
return ret;
if (ret)
return ret;
}
static int
struct intel_ring_buffer *ring)
{
/* Manually manage the write flush as we may have not yet
* retired the buffer.
*
* Note that the last_write_seqno is always the earlier of
* we know we have passed the last write.
*/
obj->last_write_seqno = 0;
return 0;
}
/**
* Ensures that all rendering to the object has completed and the object is
* safe to unbind from the GTT or access from the CPU.
*/
static int
bool readonly)
{
int ret;
if (seqno == 0)
return 0;
if (ret)
return ret;
}
/* A nonblocking variant of the above wait. This is a highly dangerous routine
* as the object state may change during this call.
*/
static int
bool readonly)
{
unsigned reset_counter;
int ret;
if (seqno == 0)
return 0;
if (ret)
return ret;
if (ret)
return ret;
if (ret)
return ret;
}
/**
* Called when user space prepares to use an object with the CPU, either
* through the mmap ioctl's mapping or a GTT mapping.
*/
int
/* LINTED */
{
int ret;
/* Only handle setting domains to types used by the CPU. */
if (write_domain & I915_GEM_GPU_DOMAINS)
return -EINVAL;
if (read_domains & I915_GEM_GPU_DOMAINS)
return -EINVAL;
/* Having something in the write domain implies it's in the read
* domain, and only that read domain. Enforce that in the request.
*/
return -EINVAL;
if (ret)
return ret;
goto unlock;
}
/* Try to flush the object off the GPU without holding the lock.
* We will repeat the flush holding the lock in the normal manner
* to catch cases where we are gazumped.
*/
if (ret)
goto unref;
if (read_domains & I915_GEM_DOMAIN_GTT) {
/* Silently promote "you're not bound, there was nothing to do"
* to success, since the client was just asking us to
* make sure everything was done.
*/
ret = 0;
} else {
}
return ret;
}
/**
* Called when user space has done writes to this buffer
*/
int
/* LINTED */
{
int ret = 0;
if (ret)
return ret;
goto unlock;
}
/* Pinned buffers may be scanout, so flush the cache */
return ret;
}
/**
* Maps the contents of an object, returning the address it is mapped
* into.
*
* While the mapping holds a reference on the contents of the object, it doesn't
* imply a ref on the object itself.
*/
int
/* LINTED */
{
int ret;
return -ENODEV;
return -EBADF;
/* prime objects have no backing filp to GEM mmap
* pages from.
*/
return -E2BIG;
}
if (ret)
return ret;
return 0;
}
void
{
int ret = 0;
return;
/* Now bind it into the GTT if needed */
/* Access to snoopable pages through the GTT is incoherent. */
goto unlock;
}
if (ret)
goto unlock;
if (ret)
goto unpin;
if (ret)
goto unpin;
obj_priv->fault_mappable = true;
/* Finally, remap it using the new GTT offset */
}
/**
* i915_gem_create_mmap_offset - create a fake mmap offset for an object
* @obj: obj in question
*
* GEM memory mapping works by handing back to userspace a fake mmap offset
* it can use in a subsequent mmap(2) call. The DRM core code then looks
* up the object based on the offset and sets up the various memory mapping
* structures.
*
* This routine allocates and attaches a fake offset for @obj.
*/
static int
{
int ret;
if (ret) {
DRM_ERROR("failed to alloc kernel memory");
return ret;
}
}
/* user_token is the fake offset
* which create in drm_map_handle at alloc time
*/
return 0;
}
/**
* i915_gem_release_mmap - remove physical page mappings
* @obj: obj in question
*
* Preserve the reservation of the mmaping with the DRM core code, but
* relinquish ownership of the pages back to the system.
*
* It is vital that we remove the page mapping if we have mapped a tiled
* object through the GTT and then lose the fence register due to
* resource pressure. Similarly if the object has been moved out of the
* aperture, than pages mapped into userspace must be revoked. Removing the
* mapping will then trigger a page fault on the next user access, allowing
* fixup by i915_gem_fault().
*/
void
{
}
}
}
}
static void
{
obj->mmap_offset = 0;
}
{
return size;
/* Previous chips need a power-of-two fence region when tiling */
else
gtt_size <<= 1;
return gtt_size;
}
/**
* i915_gem_get_gtt_alignment - return required GTT alignment for an object
* @obj: object to check
*
* Return the required GTT alignment for an object, taking into account
* potential fence register mapping if needed.
*/
int tiling_mode, bool fenced)
{
/*
* Minimum alignment is 4k (GTT page size), but might be greater
* if a fence register is needed for the object.
*/
return 4096;
/*
* Previous chips need to be aligned to the size of the smallest
* fence register that can contain the object.
*/
}
int
struct drm_device *dev,
{
int ret;
if (ret)
return ret;
goto unlock;
}
goto out;
}
if (!obj->mmap_offset) {
if (ret)
goto out;
}
out:
return ret;
}
/**
* i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
* @dev: DRM device
* @data: GTT mapping ioctl data
* @file_priv: GEM object info
*
* Simply returns the fake offset to userspace so it can mmap it.
* The mmap call will end up in drm_gem_mmap(), which will set things
* up so we can get faults in the handler above.
*
* The fault handler will take care of binding the object into the GTT
* (since it may have been evicted to make room for something), allocating
* a fence register, and mapping the appropriate aperture address into
* userspace.
*/
int
/* LINTED */
{
}
static void
{
int ret;
if (ret) {
/* In the event of a disaster, abandon all caches and
* hope for the best.
*/
}
}
static int
{
return 0;
if (obj->pages_pin_count)
return -EBUSY;
return 0;
}
static int
{
long i;
return -ENOMEM;
}
}
return 0;
}
/* Ensure that the associated pages are gathered from the backing storage
* and pinned into our object. i915_gem_object_get_pages() may be called
* multiple times before they are released by a single call to
* i915_gem_object_put_pages() - once the pages are no longer referenced
* either as a result of memory pressure (reaping pages under the shrinker)
* or as the object is itself released.
*/
int
{
int ret;
return 0;
if (ret)
return ret;
return 0;
}
void
struct intel_ring_buffer *ring)
{
/* Keep the seqno relative to the current ring */
}
/* Add a reference if we're newly entering the active list. */
}
/* Move from whatever list we were on to the tail of execution. */
if (obj->fenced_gpu_access) {
/* Bump MRU to take account of the delayed flush */
}
}
}
static void
{
obj->last_read_seqno = 0;
obj->last_write_seqno = 0;
obj->last_fenced_seqno = 0;
obj->fenced_gpu_access = false;
}
static int
{
int ret, i, j;
/* Carefully retire all requests without writing to the rings */
if (ret)
return ret;
}
/* Finally reset hw state */
ring->sync_seqno[j] = 0;
}
return 0;
}
{
int ret;
if (seqno == 0)
return -EINVAL;
/* HWS page needs to be set less than what we
* will inject to ring
*/
if (ret)
return ret;
/* Carefully set the last_seqno value so that wrap
* detection still works
*/
if (dev_priv->last_seqno == 0)
dev_priv->last_seqno--;
return 0;
}
int
{
/* reserve 0 for non-seqno */
if (dev_priv->next_seqno == 0) {
if (ret)
return ret;
}
return 0;
}
struct drm_i915_gem_object *obj,
{
int was_empty;
int ret;
/*
* Emit any outstanding flushes - execbuf can fail to emit the flush
* after having emitted the batchbuffer command. Hence we need to fix
* things up similar to emitting the lazy request. The difference here
* is that the flush _must_ happen before the next request, no matter
* what.
*/
if (ret)
return ret;
return -ENOMEM;
/* Record the position of the start of the request so that
* should we detect the updated seqno part-way through the
* GPU processing the request, we never over-estimate the
* position of the head.
*/
if (ret) {
return ret;
}
/* Whilst this request exists, batch_obj will be on the
* active_list, and so will hold the active reference. Only when this
* request is retired will the the batch_obj be moved onto the
* inactive_list and lose its active reference. Hence we do not need
* to explicitly hold another reference here.
*/
if (file) {
}
}
ring->outstanding_lazy_request = 0;
if (i915_enable_hangcheck) {
}
if (was_empty) {
/* change to delay HZ and then run work (not insert to workqueue of Linux) */
DRM_DEBUG("i915_gem: schedule_delayed_work");
}
}
if (out_seqno)
return 0;
}
static inline void
{
if (!file_priv)
return;
}
}
{
return true;
return false;
}
const u32 request_start,
const u32 request_end)
{
if (request_start < request_end) {
return true;
} else if (request_start > request_end) {
return true;
}
return false;
}
{
/* There is a possibility that unmasked head address
* pointing inside the ring, matches the batch_obj address range.
* However this is extremely unlikely.
*/
*inside = true;
return true;
}
}
*inside = false;
return true;
}
return false;
}
struct drm_i915_gem_request *request,
{
/* Innocent until proven guilty */
guilty = false;
DRM_ERROR("%s hung %s bo (0x%x ctx %d) at 0x%x\n",
acthd);
guilty = true;
}
/* If contexts are disabled or this is the default context, use
* file_priv->reset_state
*/
if (hs) {
if (guilty)
hs->batch_active++;
else
hs->batch_pending++;
}
}
{
}
struct intel_ring_buffer *ring)
{
struct drm_i915_gem_request,
list);
}
struct drm_i915_gem_object,
}
}
{
int i;
for (i = 0; i < dev_priv->num_fence_regs; i++) {
/*
* Commit delayed tiling changes if we have an object still
* attached to the fence, otherwise just clear the fence.
*/
} else {
}
}
}
{
int i;
/* Move everything out of the GPU domains to ensure we do any
* necessary invalidation upon reuse.
*/
{
}
}
/**
* This function clears the request list as sequence numbers are passed.
*/
void
{
return;
struct drm_i915_gem_request,
list);
break;
/* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
* of tail of the request to update the last known position
* of the GPU head.
*/
}
/* Move any buffers on the active list that are no longer referenced
*/
struct drm_i915_gem_object,
break;
}
if (ring->trace_irq_seqno &&
ring->trace_irq_seqno = 0;
}
}
void
{
int i;
}
static void
{
bool idle;
int i;
/* Come back later if the device is busy... */
return;
}
/* Send a periodic flush down the ring so we don't hold onto GEM
* objects indefinitely.
*/
idle = true;
if (ring->gpu_caches_dirty)
}
{
DRM_DEBUG("i915_gem: schedule_delayed_work");
}
if (idle)
}
void
{
}
/**
* Ensures that an object will eventually get non-busy by flushing any required
* write domains, emitting any outstanding lazy request and retiring and
* completed requests.
*/
static int
{
int ret;
if (ret)
return ret;
}
return 0;
}
/**
* i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
* @DRM_IOCTL_ARGS: standard ioctl arguments
*
* Returns 0 if successful, else an error is returned with the remaining time in
* the timeout parameter.
* -ETIME: object is still busy after timeout
* -ERESTARTSYS: signal interrupted the wait
* -ENONENT: object doesn't exist
* Also possible, but rare:
* -EAGAIN: GPU wedged
* -ENOMEM: damn
* -ENODEV: Internal IRQ fail
* -E?: The add request failed
*
* The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
* non-zero timeout parameter the wait ioctl will wait for the given number of
* nanoseconds on an object becoming unbusy. Since the wait itself does so
* without holding struct_mutex the object may become re-busied before this
* function completes. A similar but shorter * race condition exists in the busy
* ioctl
*/
int
{
unsigned reset_counter;
int ret = 0;
if (args->timeout_ns >= 0) {
}
if (ret)
return ret;
return -ENOENT;
}
/* Need to make sure the object gets inactive eventually. */
if (ret)
goto out;
}
if (seqno == 0)
goto out;
/* Do this after OLR check to make sure we make forward progress polling
* on this IOCTL with a 0 timeout (like busy ioctl)
*/
if (!args->timeout_ns) {
goto out;
}
if (timeout) {
}
return ret;
out:
return ret;
}
/**
* i915_gem_object_sync - sync an object to a ring.
*
* @obj: object which may be in use on another ring.
* @to: ring we wish to use the object on. May be NULL.
*
* This code is meant to abstract object synchronization with the GPU.
* Calling with NULL implies synchronizing the object with the CPU
* rather than a particular GPU ring.
*
* Returns 0 if successful, else propagates up the lower layer error.
*/
int
struct intel_ring_buffer *to)
{
return 0;
return i915_gem_object_wait_rendering(obj, false);
return 0;
if (ret)
return ret;
if (!ret)
/* We use last_read_seqno because sync_to()
* might have just caused seqno wrap under
* the radar.
*/
return ret;
}
{
/* Force a pagefault for domain tracking on next user access */
return;
}
/**
* Unbinds an object from the GTT aperture.
*/
int
{
int ret;
return 0;
return -EBUSY;
if (ret)
return ret;
/* Continue on if we fail due to EIO, the GPU is hung so we
* should be safe and we need to cleanup or else we might
* cause memory corruption through use-after-free.
*/
/* release the fence reg _after_ flushing */
if (ret)
return ret;
if (obj->has_global_gtt_mapping)
if (obj->has_aliasing_ppgtt_mapping) {
}
/* Avoid an unnecessary call to unbind on rebind. */
obj->map_and_fenceable = true;
obj->gtt_offset = 0;
return 0;
}
{
int ret, i;
/* Flush everything onto the inactive list. */
if (ret)
return ret;
if (ret)
return ret;
}
return 0;
}
struct drm_i915_gem_object *obj)
{
int fence_reg;
int fence_pitch_shift;
} else {
}
/* To w/a incoherency with non-atomic 64-bit register updates,
* we split the 64-bit update into two 32-bit writes. In order
* for a partial fence not to be evaluated between writes, we
* precede the update with write to turn off the fence register,
* and only enable the fence as the last step.
*
* For extra levels of paranoia, we make sure each step lands
* before applying the next step.
*/
I915_WRITE(fence_reg, 0);
if (obj) {
0xfffff000) << 32;
} else {
}
}
struct drm_i915_gem_object *obj)
{
if (obj) {
int pitch_val;
int tile_width;
DRM_ERROR("object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
tile_width = 128;
else
tile_width = 512;
/* Note: pitch better be a power of two tile widths */
} else
val = 0;
if (reg < 8)
else
}
struct drm_i915_gem_object *obj)
{
if (obj) {
DRM_ERROR("object 0x%08x not 512K or pot-size 0x%08x aligned\n",
} else
val = 0;
}
{
}
struct drm_i915_gem_object *obj)
{
/* Ensure that all CPU reads are completed before installing a fence
* and all writes before removing the fence.
*/
DRM_ERROR("bogus fence setup with stride: 0x%x, tiling mode: %i\n",
case 7:
case 6:
case 5:
default: BUG();
}
/* And similarly be paranoid that no direct access to this region
* is reordered to before the fence is installed.
*/
if (i915_gem_object_needs_mb(obj))
}
struct drm_i915_fence_reg *fence)
{
}
struct drm_i915_fence_reg *fence,
bool enable)
{
if (enable) {
} else {
}
obj->fence_dirty = false;
}
static int
{
if (obj->last_fenced_seqno) {
if (ret)
return ret;
obj->last_fenced_seqno = 0;
}
obj->fenced_gpu_access = false;
return 0;
}
int
{
int ret;
if (ret)
return ret;
return 0;
return 0;
}
static struct drm_i915_fence_reg *
{
int i;
/* First try to find a free reg */
return reg;
}
return NULL;
/* None available, try to steal one or wait for a user to finish */
continue;
return reg;
}
return NULL;
}
/**
* i915_gem_object_get_fence_reg - set up a fence reg for an object
* @obj: object to map through a fence reg
*
* When mapping objects through the GTT, userspace wants to be able to write
* to them without having to worry about swizzling if the object is tiled.
* This function walks the fence regs looking for a free one for @obj,
* stealing one if it can't find any.
*
* It then sets up the reg based on the object's properties: address, pitch
* and tiling format.
*
* For an untiled surface, this removes any existing fence.
*/
int
{
int ret;
/* Have we updated the tiling parameters upon the object and so
* will need to serialise the write to the associated fence register?
*/
if (obj->fence_dirty) {
if (ret)
return ret;
}
/* Just update our place in the LRU if our fence is getting reused. */
if (!obj->fence_dirty) {
return 0;
}
} else if (enable) {
return -EDEADLK;
if (ret)
return ret;
}
} else
return 0;
return 0;
}
struct drm_mm_node *gtt_space,
unsigned long cache_level)
{
/* On non-LLC machines we have to be careful when putting differing
* types of snoopable memory together to avoid the prefetcher
* crossing memory domains and dieing.
*/
return true;
return true;
return true;
return false;
return false;
return true;
}
{
#if WATCH_GTT
int err = 0;
DRM_ERROR("object found on GTT list with no space reserved\n");
err++;
continue;
}
DRM_ERROR("object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
err++;
continue;
}
if (!i915_gem_valid_gtt_space(dev,
obj->cache_level)) {
DRM_ERROR("invalid GTT space found at [%08lx, %08lx] - color=%x\n",
obj->cache_level);
err++;
continue;
}
}
#endif
}
/**
* Finds free space in the GTT aperture and binds the object there.
*/
static int
unsigned alignment,
bool map_and_fenceable,
bool nonblocking)
{
int ret;
obj->tiling_mode);
obj->tiling_mode, true);
obj->tiling_mode, false);
if (alignment == 0)
return -EINVAL;
}
/* If the object is bigger than the entire aperture, reject it early
* before evicting everything in a vain attempt to find space.
*/
DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n",
gtt_max);
return -E2BIG;
}
if (ret)
return ret;
return -ENOMEM;
}
if (ret) {
if (ret == 0)
goto search_free;
return ret;
}
if ((!i915_gem_valid_gtt_space(dev,
node,
obj->cache_level))) {
return -EINVAL;
}
if (ret) {
return ret;
}
mappable =
return 0;
}
void
{
/* If we don't have a page list set up, then we're not pinned
* to GPU, and we can ignore the cache flush because it'll happen
* again at bind time.
*/
return;
/*
* Stolen memory is always coherent with the GPU as it is explicitly
* marked as wc by the system, or the system is cache-coherent.
*/
return;
/* If the GPU is snooping the contents of the CPU cache,
* we do not need to manually clear the CPU cache lines. However,
* the caches are only snooped when the render cache is
* flushed/invalidated. As we always have to emit invalidations
* and flushes when moving into and out of the RENDER domain, correct
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
return;
}
/** Flushes the GTT write domain for the object if it's dirty. */
static void
{
return;
/* No actual flushing is required for the GTT write domain. Writes
* to it immediately go to main memory as far as we know, so there's
* no chipset flush. It also doesn't land in render cache.
*
* However, we do have to enforce the order so that all writes through
* the GTT land before any writes to the device, such as updates to
* the GATT itself.
*/
}
/** Flushes the CPU write domain for the object if it's dirty. */
static void
{
return;
}
/**
* Moves a single object to the GTT read, and possibly write domain.
*
* This function returns when the move is complete, including waiting on
* flushes to occur.
*/
int
{
int ret;
/* Not valid to be called on unbound objects. */
return -EINVAL;
return 0;
if (ret)
return ret;
/* Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the
* GTT domain upon first access.
*/
/* It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
/* GPU reset can handle this error */
// BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
if (write) {
}
/* And bump the LRU for this access */
return 0;
}
enum i915_cache_level cache_level)
{
int ret;
return 0;
DRM_DEBUG("can not change the cache level of pinned objects\n");
return -EBUSY;
}
if (ret)
return ret;
}
if (ret)
return ret;
/* Before SandyBridge, you could not use tiling or fence
* registers with snooped memory, so relinquish any fences
* currently pointing to our region in the aperture.
*/
if (ret)
return ret;
}
if (obj->has_global_gtt_mapping)
obj, cache_level);
}
if (cache_level == I915_CACHE_NONE) {
/* If we're coming from LLC cached, then we haven't
* actually been tracking whether the data is in the
* CPU cache or not, since we only allow one bit set
* in obj->write_domain and have been skipping the clflushes.
* Just set it to the CPU cache for now.
*/
}
return 0;
}
{
int ret;
if (ret)
return ret;
goto unlock;
}
return ret;
}
{
int ret;
case I915_CACHING_NONE:
break;
case I915_CACHING_CACHED:
break;
default:
return -EINVAL;
}
if (ret)
return ret;
goto unlock;
}
return ret;
}
/*
* Prepare buffer for display plane (scanout, cursors, etc).
* Can be called from an uninterruptible phase (modesetting) and allows
* any flushes to be pipelined (for pageflips).
*/
int
struct intel_ring_buffer *pipelined)
{
/* LINTED */
int ret;
if (ret)
return ret;
}
/* The display engine is not coherent with the LLC cache on gen6. As
* a result, we make sure that the pinning that is about to occur is
* done with uncached PTEs. This is lowest common denominator for all
* chipsets.
*
* However for gen6+, we could do better by using the GFDT bit instead
* of uncaching, which would allow us to flush all the LLC-cached data
* with that bit in the PTE to main memory with just one PIPE_CONTROL.
*/
if (ret)
return ret;
/* As the user may map the buffer once pinned in the display plane
* (e.g. libkms for the bootup splash), we have to ensure that we
* always use map_and_fenceable for all scanout buffers.
*/
if (ret)
return ret;
/* It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
return 0;
}
int
{
int ret;
return 0;
if (ret)
return ret;
/* Ensure that we invalidate the GPU's caches and TLBs. */
return 0;
}
/**
* Moves a single object to the CPU read, and possibly write domain.
*
* This function returns when the move is complete, including waiting on
* flushes to occur.
*/
int
{
/* LINTED */
int ret;
return 0;
if (ret)
return ret;
/* Flush the CPU cache if it's still invalid. */
}
/* It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
/* If we're writing through the CPU, then the GPU read domains will
* need to be invalidated at next use.
*/
if (write) {
}
return 0;
}
/* Throttle our rendering by waiting until the ring has completed our requests
* emitted over 20 msec ago.
*
* Note that if we were to use the current jiffies each time around the loop,
* we wouldn't escape the function with any frames outstanding if the time to
* render a frame was over 20ms.
*
* This should get us reasonable parallelism between CPU and GPU but also
* relatively low latency when blocking on a particular request to finish.
*/
static int
{
unsigned reset_counter;
int ret;
if (ret)
return ret;
if (ret)
return ret;
list_for_each_entry(request, struct drm_i915_gem_request, &file_priv->mm.request_list, client_list) {
break;
}
if (seqno == 0)
return 0;
if (ret == 0)
return ret;
}
int
bool map_and_fenceable,
bool nonblocking)
{
int ret;
return -EBUSY;
DRM_INFO("bo is already pinned with incorrect alignment:"
" offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
" obj->map_and_fenceable=%d\n",
if (ret)
return ret;
}
}
if (ret)
return ret;
}
return 0;
}
void
{
obj->pin_mappable = false;
}
int
/* LINTED */
{
int ret;
if (ret)
return ret;
goto unlock;
}
DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
goto out;
}
obj->user_pin_count++;
if (ret)
goto out;
}
/* XXX - flush the CPU caches for pinned objects
* as the X server doesn't manage domains yet
*/
out:
return ret;
}
int
/* LINTED */
{
int ret;
if (ret)
return ret;
goto unlock;
}
DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
goto out;
}
obj->user_pin_count--;
if (obj->user_pin_count == 0) {
}
out:
return ret;
}
int
/* LINTED */
{
int ret;
if (ret)
return ret;
goto unlock;
}
/* Count all active objects as busy, even if they are currently not used
* by the gpu. Users of this interface expect objects to eventually
* become non-busy without any further actions, therefore emit any
* necessary flushes here.
*/
}
return ret;
}
int
/* LINTED */
{
}
int
/* LINTED */
{
/* Don't enable buffer catch */
return 0;
}
const struct drm_i915_gem_object_ops *ops)
{
/* Avoid an unnecessary call to unbind on the first bind. */
obj->map_and_fenceable = true;
}
};
{
int gen;
return NULL;
gen = 33;
else
DRM_ERROR("failed to init gem object");
return NULL;
}
/* On Gen6, we can have the GPU use the LLC (the CPU
* cache) for about a 10% performance improvement
* compared to uncached. Graphics requests other than
* display scanout are coherent with the CPU in
* accessing this cache. This means in this mode we
* don't need to clflush on the CPU side, and on the
* GPU side we only need to flush internal caches to
* get data visible to the CPU.
*
* However, we maintain the display planes as UC, and so
* need to rebind when first used as such.
*/
} else
return obj;
}
{
DRM_ERROR("i915_gem_init_object is not supported, BUG!");
return 0;
}
{
int ret;
if (ret) {
bool was_interruptible;
}
/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
* before progressing. */
if (obj->pages_pin_count)
obj->pages_pin_count = 0;
if (obj->mmap_offset)
// if (obj->base.import_attach)
// drm_prime_gem_destroy(&obj->base, NULL);
}
int
{
int ret;
return 0;
}
if (ret) {
return ret;
}
/* Under UMS, be paranoid and evict. */
/* Hack! Don't let anybody do execbuf while we don't control the chip.
* We need to replace this with a semaphore, or something.
* And not confound mm.suspended!
*/
/* Cancel the retire work handler, wait for it to finish if running
*/
return 0;
}
{
int i;
if (!HAS_L3_GPU_CACHE(dev))
return;
return;
for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
DRM_DEBUG("0x%x was already programmed to %x\n",
GEN7_L3LOG_BASE + i, remap);
DRM_DEBUG_DRIVER("Clearing remapped register\n");
}
/* Make sure all the writes land before disabling dop clock gating */
}
{
return;
return;
/* LINTED */
else
BUG();
}
static bool
{
return false;
return true;
}
{
int ret;
if (ret)
return ret;
if (ret)
goto cleanup_render_ring;
}
if (intel_enable_blt(dev)) {
if (ret)
goto cleanup_bsd_ring;
}
if (ret)
goto cleanup_blt_ring;
}
if (ret)
goto cleanup_vebox_ring;
return 0;
return ret;
}
int
{
int ret;
if (HAS_PCH_NOP(dev)) {
}
if (ret)
return ret;
/*
* XXX: There was some w/a described somewhere suggesting loading
* contexts before PPGTT.
*/
if (ret) {
DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n");
}
}
return 0;
}
{
int ret;
int size;
if (IS_VALLEYVIEW(dev)) {
/* VLVA0 (potential hack), BIOS isn't actually waking us */
DRM_DEBUG_DRIVER("allow wake ack timed out\n");
}
if (size > 0) {
/* save original fb GTT */
/*
* Some BIOSes fail to initialise the GTT, which will cause DMA faults when
* the IOMMU is enabled. We need to clear the whole GTT.
*/
/* workaround: prealloc fb buffer, make sure the start address 0 */
DRM_ERROR("failed to allocate framebuffer");
return (-ENOMEM);
}
/* copy old content to fb buffer */
/* Flush everything out, we'll be doing GTT only from now on */
if (ret) {
return ret;
}
}
if (ret) {
return ret;
}
/* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
return 0;
}
void
{
int i;
}
int
/* LINTED */
{
int ret;
return 0;
DRM_ERROR("Reenabling wedged hardware, good luck\n");
}
if (ret != 0) {
return ret;
}
if (ret)
goto cleanup_ringbuffer;
return 0;
return ret;
}
int
/* LINTED */
{
return 0;
(void ) drm_irq_uninstall(dev);
return i915_gem_idle(dev, 0);
}
void
{
int ret;
return;
if (ret)
}
static void
{
}
void
{
int i;
for (i = 0; i < I915_NUM_RINGS; i++)
for (i = 0; i < I915_MAX_NUM_FENCES; i++)
(void *)dev);
/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
}
/* Old X drivers will take 0-2 for front, back, depth buffers */
else
/* Initialize fence registers to zero */
}
/*
* Create a physically contiguous memory object for this object
* e.g. for cursor + overlay regs
*/
{
int ret;
return 0;
if (!phys_obj)
return -ENOMEM;
goto kfree_obj;
}
return 0;
return ret;
}
{
return;
}
}
{
int i;
for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
}
struct drm_i915_gem_object *obj)
{
int i, ret;
int page_count;
return;
if (ret)
goto out;
}
for (i = 0; i < page_count; i++) {
}
out:
}
int
struct drm_i915_gem_object *obj,
int id,
int align)
{
int ret = 0;
int page_count;
int i;
if (id > I915_MAX_PHYS_OBJECT)
return -EINVAL;
return 0;
}
/* create a new object */
if (ret) {
goto out;
}
}
/* bind to the object */
if (ret) {
DRM_ERROR("failed to get page list\n");
goto out;
}
}
for (i = 0; i < page_count; i++) {
}
return 0;
out:
return ret;
}
static int
struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
/* LINTED */
{
void *obj_addr;
int ret;
if (ret)
return -EFAULT;
return 0;
}
{
/* i915_gpu_idle() generates warning message, so just ignore return */
(void) i915_gpu_idle(dev);
/* Clean up our request list when the client is going away, so that
* later retire_requests won't dereference our soon-to-be-gone
* file_priv.
*/
struct drm_i915_gem_request,
}
}