--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/open-src/lib/mesa/fix-gen6-viewperf-hang.patch Sun May 06 11:15:02 2012 -0700
@@ -0,0 +1,325 @@
+--- src/mesa/drivers/dri/i965/gen6_wm_state.c Mon Mar 26 14:38:15 2012
++++ src/mesa/drivers/dri/i965/gen6_wm_state.c Mon Mar 26 14:41:19 2012
+@@ -102,15 +102,14 @@
+ /* CACHE_NEW_WM_PROG */
+ if (brw->wm.prog_data->nr_params == 0) {
+ /* Disable the push constant buffers. */
+- BEGIN_BATCH(5);
++ BEGIN_BATCH(14);
+ OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+- ADVANCE_BATCH();
+ } else {
+- BEGIN_BATCH(5);
++ BEGIN_BATCH(14);
+ OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
+ GEN6_CONSTANT_BUFFER_0_ENABLE |
+ (5 - 2));
+@@ -123,7 +122,6 @@
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+- ADVANCE_BATCH();
+ }
+
+ dw2 = dw4 = dw5 = dw6 = 0;
+@@ -183,7 +181,6 @@
+ dw6 |= brw_count_bits(brw->fragment_program->Base.InputsRead) <<
+ GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
+
+- BEGIN_BATCH(9);
+ OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
+ OUT_BATCH(brw->wm.prog_offset);
+ OUT_BATCH(dw2);
+--- src/mesa/drivers/dri/i965/brw_misc_state.c 2012-03-28 17:33:56.614254514 +0800
++++ src/mesa/drivers/dri/i965/brw_misc_state.c 2012-03-28 17:42:43.199231662 +0800
+@@ -728,10 +728,24 @@ static void upload_state_base_address( s
+ */
+
+ if (intel->gen >= 6) {
+- if (intel->gen == 6)
+- intel_emit_post_sync_nonzero_flush(intel);
++ if ((intel->gen == 6) && intel->batch.need_workaround_flush) {
++
++ BEGIN_BATCH(18);
++ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
++ OUT_BATCH(PIPE_CONTROL_CS_STALL |
++ PIPE_CONTROL_STALL_AT_SCOREBOARD);
++ OUT_BATCH(0); /* address */
++ OUT_BATCH(0); /* write data */
++
++ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
++ OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
++ OUT_RELOC(intel->batch.workaround_bo,
++ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
++ OUT_BATCH(0); /* write data */
++
++ } else
++ BEGIN_BATCH(10);
+
+- BEGIN_BATCH(10);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+ /* General state base address: stateless DP read/write requests */
+ OUT_BATCH(1);
+@@ -762,6 +776,8 @@ static void upload_state_base_address( s
+ OUT_BATCH(1); /* Indirect object upper bound */
+ OUT_BATCH(1); /* Instruction access upper bound */
+ ADVANCE_BATCH();
++ if (intel->batch.need_workaround_flush)
++ intel->batch.need_workaround_flush = false;
+ } else if (intel->gen == 5) {
+ BEGIN_BATCH(8);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+--- src/mesa/drivers/dri/i965/brw_draw.c 2012-03-30 17:40:30.144996415 +0800
++++ src/mesa/drivers/dri/i965/brw_draw.c 2012-03-30 17:44:54.000562204 +0800
+@@ -157,6 +157,12 @@ static void brw_emit_prim(struct brw_con
+ if (verts_per_instance == 0)
+ return;
+
++ /* workaround for gen6, reserve enough space for HW workaround */
++ if (intel->gen == 6) {
++ if (intel_batchbuffer_space(intel) < 30*4)
++ intel_batchbuffer_flush(intel);
++ }
++
+ /* If we're set to always flush, do it before and after the primitive emit.
+ * We want to catch both missed flushes that hurt instruction/state cache
+ * and missed flushes of the render cache as it heads to other parts of
+--- src/mesa/drivers/dri/i965/brw_misc_state.c 2012-03-30 17:40:30.201906235 +0800
++++ src/mesa/drivers/dri/i965/brw_misc_state.c 2012-03-30 17:44:54.001618123 +0800
+@@ -223,6 +223,12 @@ static void emit_depthbuffer(struct brw_
+ struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL;
+ unsigned int len;
+
++ /* workaround for gen6, reserve enough space for HW workaround */
++ if (intel->gen == 6) {
++ if (intel_batchbuffer_space(intel) < 31*4)
++ intel_batchbuffer_flush(intel);
++ }
++
+ /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
+ * non-pipelined state that will need the PIPE_CONTROL workaround.
+ */
+@@ -456,6 +462,12 @@ static void upload_polygon_stipple(struc
+ if (!ctx->Polygon.StippleFlag)
+ return;
+
++ /* workaround for gen6, reserve enough space for HW workaround */
++ if (intel->gen == 6) {
++ if (intel_batchbuffer_space(intel) < 41*4)
++ intel_batchbuffer_flush(intel);
++ }
++
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
+
+@@ -504,6 +516,12 @@ static void upload_polygon_stipple_offse
+ if (!ctx->Polygon.StippleFlag)
+ return;
+
++ /* workaround for gen6, reserve enough space for HW workaround */
++ if (intel->gen == 6) {
++ if (intel_batchbuffer_space(intel) < 10*4)
++ intel_batchbuffer_flush(intel);
++ }
++
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
+
+@@ -548,6 +566,12 @@ static void upload_aa_line_parameters(st
+ if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
+ return;
+
++ /* workaround for gen6, reserve enough space for HW workaround */
++ if (intel->gen == 6) {
++ if (intel_batchbuffer_space(intel) < 11*4)
++ intel_batchbuffer_flush(intel);
++ }
++
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
+
+@@ -581,6 +605,12 @@ static void upload_line_stipple(struct b
+ if (!ctx->Line.StippleFlag)
+ return;
+
++ /* workaround for gen6, reserve enough space for HW workaround */
++ if (intel->gen == 6) {
++ if (intel_batchbuffer_space(intel) < 11*4)
++ intel_batchbuffer_flush(intel);
++ }
++
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
+
+@@ -612,8 +642,62 @@ static void upload_invarient_state( stru
+ struct intel_context *intel = &brw->intel;
+
+ /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
+- if (intel->gen == 6)
+- intel_emit_post_sync_nonzero_flush(intel);
++ if (intel->gen == 6) {
++ int i;
++
++ if (intel->batch.need_workaround_flush) {
++
++ BEGIN_BATCH(33);
++ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
++ OUT_BATCH(PIPE_CONTROL_CS_STALL |
++ PIPE_CONTROL_STALL_AT_SCOREBOARD);
++ OUT_BATCH(0); /* address */
++ OUT_BATCH(0); /* write data */
++
++ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
++ OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
++ OUT_RELOC(intel->batch.workaround_bo,
++ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
++ OUT_BATCH(0); /* write data */
++ } else
++ BEGIN_BATCH(25);
++ /* Select the 3D pipeline (as opposed to media) */
++ OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
++
++ OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (1));
++ OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
++ MS_NUMSAMPLES_1);
++ OUT_BATCH(0); /* positions for 4/8-sample */
++
++ OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
++ OUT_BATCH(1);
++
++ if (intel->gen < 7) {
++ for (i = 0; i < 4; i++) {
++ OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
++ OUT_BATCH(i << SVB_INDEX_SHIFT);
++ OUT_BATCH(0);
++ OUT_BATCH(0xffffffff);
++ }
++ }
++
++ /* 0x61020000 State Instruction Pointer */
++ {
++ OUT_BATCH(CMD_STATE_INSN_POINTER);
++ OUT_BATCH(0);
++ }
++
++
++ {
++ OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
++ (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
++ }
++ ADVANCE_BATCH();
++
++ if (intel->batch.need_workaround_flush)
++ intel->batch.need_workaround_flush = false;
++
++ } else {
+
+ {
+ /* 0x61040000 Pipeline Select */
+@@ -693,6 +777,7 @@ static void upload_invarient_state( stru
+
+ BRW_BATCH_STRUCT(brw, &vfs);
+ }
++ }
+ }
+
+ const struct brw_tracked_state brw_invarient_state = {
+--- src/mesa/drivers/dri/intel/intel_batchbuffer.c 2012-03-30 17:40:29.342162385 +0800
++++ src/mesa/drivers/dri/intel/intel_batchbuffer.c 2012-03-30 17:43:45.957231205 +0800
+@@ -83,6 +83,9 @@ intel_batchbuffer_reset(struct intel_con
+ intel->maxBatchSize, 4096);
+
+ intel->batch.reserved_space = 4096;
++ /* reserve more space for hw workaround */
++ if (intel->gen == 6)
++ intel->batch.reserved_space += 14*4;
+ intel->batch.state_batch_offset = intel->batch.bo->size;
+ intel->batch.used = 0;
+ }
+--- src/mesa/drivers/dri/i965/brw_draw_upload.c 2012-04-03 06:33:26.650545317 +0800
++++ src/mesa/drivers/dri/i965/brw_draw_upload.c 2012-04-03 06:44:34.953419019 +0800
+@@ -531,8 +531,14 @@ static void brw_emit_vertices(struct brw
+ struct intel_context *intel = intel_context(ctx);
+ GLuint i;
+
++ /* workaround for gen6, reserve enough space for HW workaround */
++ if (intel->gen == 6) {
++ if (intel_batchbuffer_space(intel) < (13 + 4*brw->vb.nr_buffers + 2* brw->vb.nr_enabled)*4)
++ intel_batchbuffer_flush(intel);
++ }
+ brw_emit_query_begin(brw);
+
++
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), emit a single pad
+ * VERTEX_ELEMENT struct and bail.
+
+diff -rupN Mesa-7.11.2.ori/src/mesa/drivers/dri/i915/intel_batchbuffer.c Mesa-7.11.2/src/mesa/drivers/dri/i915/intel_batchbuffer.c
+--- src/mesa/drivers/dri/i915/intel_batchbuffer.c 2012-04-05 16:56:14.558005520 +0800
++++ src/mesa/drivers/dri/i915/intel_batchbuffer.c 2012-04-06 01:28:50.914943452 +0800
+@@ -427,8 +427,10 @@ intel_batchbuffer_emit_mi_flush(struct i
+ OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
+ PIPE_CONTROL_WRITE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
++ PIPE_CONTROL_VF_CACHE_INVALIDATE |
+ PIPE_CONTROL_TC_FLUSH |
+- PIPE_CONTROL_NO_WRITE);
++ PIPE_CONTROL_NO_WRITE |
++ PIPE_CONTROL_CS_STALL);
+ OUT_BATCH(0); /* write address */
+ OUT_BATCH(0); /* write data */
+ ADVANCE_BATCH();
+diff -rupN Mesa-7.11.2.ori/src/mesa/drivers/dri/i965/brw_draw.c Mesa-7.11.2/src/mesa/drivers/dri/i965/brw_draw.c
+--- src/mesa/drivers/dri/i965/brw_draw.c 2012-04-05 16:56:14.560673892 +0800
++++ src/mesa/drivers/dri/i965/brw_draw.c 2012-04-05 17:20:05.997773534 +0800
+@@ -168,7 +168,7 @@ static void brw_emit_prim(struct brw_con
+ * and missed flushes of the render cache as it heads to other parts of
+ * the besides the draw code.
+ */
+- if (intel->always_flush_cache) {
++ if ((intel->always_flush_cache) || (intel->gen == 6)) {
+ intel_batchbuffer_emit_mi_flush(intel);
+ }
+
+@@ -185,7 +185,7 @@ static void brw_emit_prim(struct brw_con
+
+ intel->batch.need_workaround_flush = true;
+
+- if (intel->always_flush_cache) {
++ if ((intel->always_flush_cache) || (intel->gen == 6)) {
+ intel_batchbuffer_emit_mi_flush(intel);
+ }
+ }
+--- src/mesa/drivers/dri/i965/gen6_vs_state.c 2012-04-05 16:56:14.563966631 +0800
++++ src/mesa/drivers/dri/i965/gen6_vs_state.c 2012-04-05 16:59:20.840833882 +0800
+@@ -120,6 +120,12 @@ upload_vs_state(struct brw_context *brw)
+ {
+ struct intel_context *intel = &brw->intel;
+
++ /* workaround for gen6, reserve enough space for HW workaround */
++ if (intel->gen == 6) {
++ if (intel_batchbuffer_space(intel) < 23*4)
++ intel_batchbuffer_flush(intel);
++ }
++
+ if (brw->vs.push_const_size == 0) {
+ /* Disable the push constant buffers. */
+ BEGIN_BATCH(5);
+--- src/mesa/drivers/dri/i965/intel_batchbuffer.c 2012-04-05 16:56:14.564548246 +0800
++++ src/mesa/drivers/dri/i965/intel_batchbuffer.c 2012-04-06 01:28:50.914943452 +0800
+@@ -427,8 +427,10 @@ intel_batchbuffer_emit_mi_flush(struct i
+ OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
+ PIPE_CONTROL_WRITE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
++ PIPE_CONTROL_VF_CACHE_INVALIDATE |
+ PIPE_CONTROL_TC_FLUSH |
+- PIPE_CONTROL_NO_WRITE);
++ PIPE_CONTROL_NO_WRITE |
++ PIPE_CONTROL_CS_STALL);
+ OUT_BATCH(0); /* write address */
+ OUT_BATCH(0); /* write data */
+ ADVANCE_BATCH();