--- src/mesa/drivers/dri/i965/gen6_wm_state.c Mon Mar 26 14:38:15 2012
+++ src/mesa/drivers/dri/i965/gen6_wm_state.c Mon Mar 26 14:41:19 2012
@@ -102,15 +102,14 @@
/* CACHE_NEW_WM_PROG */
if (brw->wm.prog_data->nr_params == 0) {
/* Disable the push constant buffers. */
- BEGIN_BATCH(5);
+ BEGIN_BATCH(14);
OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
- ADVANCE_BATCH();
} else {
- BEGIN_BATCH(5);
+ BEGIN_BATCH(14);
OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
@@ -123,7 +122,6 @@
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
- ADVANCE_BATCH();
}
dw2 = dw4 = dw5 = dw6 = 0;
@@ -183,7 +181,6 @@
dw6 |= brw_count_bits(brw->fragment_program->Base.InputsRead) <<
GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
- BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_BATCH(brw->wm.prog_offset);
OUT_BATCH(dw2);
--- src/mesa/drivers/dri/i965/brw_misc_state.c 2012-03-28 17:33:56.614254514 +0800
+++ src/mesa/drivers/dri/i965/brw_misc_state.c 2012-03-28 17:42:43.199231662 +0800
@@ -728,10 +728,24 @@ static void upload_state_base_address( s
*/
if (intel->gen >= 6) {
- if (intel->gen == 6)
- intel_emit_post_sync_nonzero_flush(intel);
+ if ((intel->gen == 6) && intel->batch.need_workaround_flush) {
+
+ BEGIN_BATCH(18);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+ OUT_BATCH(PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ OUT_BATCH(0); /* address */
+ OUT_BATCH(0); /* write data */
+
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+ OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
+ OUT_RELOC(intel->batch.workaround_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+ OUT_BATCH(0); /* write data */
+
+ } else
+ BEGIN_BATCH(10);
- BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
/* General state base address: stateless DP read/write requests */
OUT_BATCH(1);
@@ -762,6 +776,8 @@ static void upload_state_base_address( s
OUT_BATCH(1); /* Indirect object upper bound */
OUT_BATCH(1); /* Instruction access upper bound */
ADVANCE_BATCH();
+ if (intel->batch.need_workaround_flush)
+ intel->batch.need_workaround_flush = false;
} else if (intel->gen == 5) {
BEGIN_BATCH(8);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
--- src/mesa/drivers/dri/i965/brw_draw.c 2012-03-30 17:40:30.144996415 +0800
+++ src/mesa/drivers/dri/i965/brw_draw.c 2012-03-30 17:44:54.000562204 +0800
@@ -157,6 +157,12 @@ static void brw_emit_prim(struct brw_con
if (verts_per_instance == 0)
return;
+ /* workaround for gen6, reserve enough space for HW workaround */
+ if (intel->gen == 6) {
+ if (intel_batchbuffer_space(intel) < 30*4)
+ intel_batchbuffer_flush(intel);
+ }
+
/* If we're set to always flush, do it before and after the primitive emit.
* We want to catch both missed flushes that hurt instruction/state cache
* and missed flushes of the render cache as it heads to other parts of
--- src/mesa/drivers/dri/i965/brw_misc_state.c 2012-03-30 17:40:30.201906235 +0800
+++ src/mesa/drivers/dri/i965/brw_misc_state.c 2012-03-30 17:44:54.001618123 +0800
@@ -223,6 +223,12 @@ static void emit_depthbuffer(struct brw_
struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL;
unsigned int len;
+ /* workaround for gen6, reserve enough space for HW workaround */
+ if (intel->gen == 6) {
+ if (intel_batchbuffer_space(intel) < 31*4)
+ intel_batchbuffer_flush(intel);
+ }
+
/* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
* non-pipelined state that will need the PIPE_CONTROL workaround.
*/
@@ -456,6 +462,12 @@ static void upload_polygon_stipple(struc
if (!ctx->Polygon.StippleFlag)
return;
+ /* workaround for gen6, reserve enough space for HW workaround */
+ if (intel->gen == 6) {
+ if (intel_batchbuffer_space(intel) < 41*4)
+ intel_batchbuffer_flush(intel);
+ }
+
if (intel->gen == 6)
intel_emit_post_sync_nonzero_flush(intel);
@@ -504,6 +516,12 @@ static void upload_polygon_stipple_offse
if (!ctx->Polygon.StippleFlag)
return;
+ /* workaround for gen6, reserve enough space for HW workaround */
+ if (intel->gen == 6) {
+ if (intel_batchbuffer_space(intel) < 10*4)
+ intel_batchbuffer_flush(intel);
+ }
+
if (intel->gen == 6)
intel_emit_post_sync_nonzero_flush(intel);
@@ -548,6 +566,12 @@ static void upload_aa_line_parameters(st
if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
return;
+ /* workaround for gen6, reserve enough space for HW workaround */
+ if (intel->gen == 6) {
+ if (intel_batchbuffer_space(intel) < 11*4)
+ intel_batchbuffer_flush(intel);
+ }
+
if (intel->gen == 6)
intel_emit_post_sync_nonzero_flush(intel);
@@ -581,6 +605,12 @@ static void upload_line_stipple(struct b
if (!ctx->Line.StippleFlag)
return;
+ /* workaround for gen6, reserve enough space for HW workaround */
+ if (intel->gen == 6) {
+ if (intel_batchbuffer_space(intel) < 11*4)
+ intel_batchbuffer_flush(intel);
+ }
+
if (intel->gen == 6)
intel_emit_post_sync_nonzero_flush(intel);
@@ -612,8 +642,62 @@ static void upload_invarient_state( stru
struct intel_context *intel = &brw->intel;
/* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
- if (intel->gen == 6)
- intel_emit_post_sync_nonzero_flush(intel);
+ if (intel->gen == 6) {
+ int i;
+
+ if (intel->batch.need_workaround_flush) {
+
+ BEGIN_BATCH(33);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+ OUT_BATCH(PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ OUT_BATCH(0); /* address */
+ OUT_BATCH(0); /* write data */
+
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+ OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
+ OUT_RELOC(intel->batch.workaround_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+ OUT_BATCH(0); /* write data */
+ } else
+ BEGIN_BATCH(25);
+ /* Select the 3D pipeline (as opposed to media) */
+ OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
+
+ OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (1));
+ OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
+ MS_NUMSAMPLES_1);
+ OUT_BATCH(0); /* positions for 4/8-sample */
+
+ OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
+ OUT_BATCH(1);
+
+ if (intel->gen < 7) {
+ for (i = 0; i < 4; i++) {
+ OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
+ OUT_BATCH(i << SVB_INDEX_SHIFT);
+ OUT_BATCH(0);
+ OUT_BATCH(0xffffffff);
+ }
+ }
+
+ /* 0x61020000 State Instruction Pointer */
+ {
+ OUT_BATCH(CMD_STATE_INSN_POINTER);
+ OUT_BATCH(0);
+ }
+
+
+ {
+ OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
+ (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
+ }
+ ADVANCE_BATCH();
+
+ if (intel->batch.need_workaround_flush)
+ intel->batch.need_workaround_flush = false;
+
+ } else {
{
/* 0x61040000 Pipeline Select */
@@ -693,6 +777,7 @@ static void upload_invarient_state( stru
BRW_BATCH_STRUCT(brw, &vfs);
}
+ }
}
const struct brw_tracked_state brw_invarient_state = {
--- src/mesa/drivers/dri/intel/intel_batchbuffer.c 2012-03-30 17:40:29.342162385 +0800
+++ src/mesa/drivers/dri/intel/intel_batchbuffer.c 2012-03-30 17:43:45.957231205 +0800
@@ -83,6 +83,9 @@ intel_batchbuffer_reset(struct intel_con
intel->maxBatchSize, 4096);
intel->batch.reserved_space = 4096;
+ /* reserve more space for hw workaround */
+ if (intel->gen == 6)
+ intel->batch.reserved_space += 14*4;
intel->batch.state_batch_offset = intel->batch.bo->size;
intel->batch.used = 0;
}
--- src/mesa/drivers/dri/i965/brw_draw_upload.c 2012-04-03 06:33:26.650545317 +0800
+++ src/mesa/drivers/dri/i965/brw_draw_upload.c 2012-04-03 06:44:34.953419019 +0800
@@ -531,8 +531,14 @@ static void brw_emit_vertices(struct brw
struct intel_context *intel = intel_context(ctx);
GLuint i;
+ /* workaround for gen6, reserve enough space for HW workaround */
+ if (intel->gen == 6) {
+ if (intel_batchbuffer_space(intel) < (13 + 4*brw->vb.nr_buffers + 2* brw->vb.nr_enabled)*4)
+ intel_batchbuffer_flush(intel);
+ }
brw_emit_query_begin(brw);
+
/* If the VS doesn't read any inputs (calculating vertex position from
* a state variable for some reason, for example), emit a single pad
* VERTEX_ELEMENT struct and bail.
diff -rupN Mesa-7.11.2.ori/src/mesa/drivers/dri/i915/intel_batchbuffer.c Mesa-7.11.2/src/mesa/drivers/dri/i915/intel_batchbuffer.c
--- src/mesa/drivers/dri/i915/intel_batchbuffer.c 2012-04-05 16:56:14.558005520 +0800
+++ src/mesa/drivers/dri/i915/intel_batchbuffer.c 2012-04-06 01:28:50.914943452 +0800
@@ -427,8 +427,10 @@ intel_batchbuffer_emit_mi_flush(struct i
OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_VF_CACHE_INVALIDATE |
PIPE_CONTROL_TC_FLUSH |
- PIPE_CONTROL_NO_WRITE);
+ PIPE_CONTROL_NO_WRITE |
+ PIPE_CONTROL_CS_STALL);
OUT_BATCH(0); /* write address */
OUT_BATCH(0); /* write data */
ADVANCE_BATCH();
diff -rupN Mesa-7.11.2.ori/src/mesa/drivers/dri/i965/brw_draw.c Mesa-7.11.2/src/mesa/drivers/dri/i965/brw_draw.c
--- src/mesa/drivers/dri/i965/brw_draw.c 2012-04-05 16:56:14.560673892 +0800
+++ src/mesa/drivers/dri/i965/brw_draw.c 2012-04-05 17:20:05.997773534 +0800
@@ -168,7 +168,7 @@ static void brw_emit_prim(struct brw_con
* and missed flushes of the render cache as it heads to other parts of
* the besides the draw code.
*/
- if (intel->always_flush_cache) {
+ if ((intel->always_flush_cache) || (intel->gen == 6)) {
intel_batchbuffer_emit_mi_flush(intel);
}
@@ -185,7 +185,7 @@ static void brw_emit_prim(struct brw_con
intel->batch.need_workaround_flush = true;
- if (intel->always_flush_cache) {
+ if ((intel->always_flush_cache) || (intel->gen == 6)) {
intel_batchbuffer_emit_mi_flush(intel);
}
}
--- src/mesa/drivers/dri/i965/gen6_vs_state.c 2012-04-05 16:56:14.563966631 +0800
+++ src/mesa/drivers/dri/i965/gen6_vs_state.c 2012-04-05 16:59:20.840833882 +0800
@@ -120,6 +120,12 @@ upload_vs_state(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
+ /* workaround for gen6, reserve enough space for HW workaround */
+ if (intel->gen == 6) {
+ if (intel_batchbuffer_space(intel) < 23*4)
+ intel_batchbuffer_flush(intel);
+ }
+
if (brw->vs.push_const_size == 0) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
--- src/mesa/drivers/dri/i965/intel_batchbuffer.c 2012-04-05 16:56:14.564548246 +0800
+++ src/mesa/drivers/dri/i965/intel_batchbuffer.c 2012-04-06 01:28:50.914943452 +0800
@@ -427,8 +427,10 @@ intel_batchbuffer_emit_mi_flush(struct i
OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_VF_CACHE_INVALIDATE |
PIPE_CONTROL_TC_FLUSH |
- PIPE_CONTROL_NO_WRITE);
+ PIPE_CONTROL_NO_WRITE |
+ PIPE_CONTROL_CS_STALL);
OUT_BATCH(0); /* write address */
OUT_BATCH(0); /* write data */
ADVANCE_BATCH();