7165933 running certain viewperf viewsets causes hang on Sandybridge chipsets s11u1_16
authorNiveditha Rau <Niveditha.Rau@Oracle.COM>
Sun, 06 May 2012 11:15:02 -0700
changeset 1283 9ac39381548a
parent 1282 d31e0afd0923
child 1284 575cafb42372
7165933 running certain viewperf viewsets causes hang on Sandybridge chipsets
open-src/lib/mesa/Makefile
open-src/lib/mesa/fix-gen6-viewperf-hang.patch
--- a/open-src/lib/mesa/Makefile	Fri May 04 14:47:54 2012 -0700
+++ b/open-src/lib/mesa/Makefile	Sun May 06 11:15:02 2012 -0700
@@ -67,7 +67,8 @@
 	gcc-bitcount.patch,-p1 \
 	install-pc-dir.patch \
 	glu-no-mangled-exports.patch \
-	solaris-port.patch bromolow-chipset.patch
+	solaris-port.patch bromolow-chipset.patch \
+	fix-gen6-viewperf-hang.patch
 
 # File created by running configure script
 AUTOCONF_TARGET=$(SOURCE_DIR)/configs/autoconf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/open-src/lib/mesa/fix-gen6-viewperf-hang.patch	Sun May 06 11:15:02 2012 -0700
@@ -0,0 +1,325 @@
+--- src/mesa/drivers/dri/i965/gen6_wm_state.c	Mon Mar 26 14:38:15 2012
++++ src/mesa/drivers/dri/i965/gen6_wm_state.c	Mon Mar 26 14:41:19 2012
+@@ -102,15 +102,14 @@
+     /* CACHE_NEW_WM_PROG */
+    if (brw->wm.prog_data->nr_params == 0) {
+       /* Disable the push constant buffers. */
+-      BEGIN_BATCH(5);
++      BEGIN_BATCH(14);
+       OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
+       OUT_BATCH(0);
+       OUT_BATCH(0);
+       OUT_BATCH(0);
+       OUT_BATCH(0);
+-      ADVANCE_BATCH();
+    } else {
+-      BEGIN_BATCH(5);
++      BEGIN_BATCH(14);
+       OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
+ 		GEN6_CONSTANT_BUFFER_0_ENABLE |
+ 		(5 - 2));
+@@ -123,7 +122,6 @@
+       OUT_BATCH(0);
+       OUT_BATCH(0);
+       OUT_BATCH(0);
+-      ADVANCE_BATCH();
+    }
+ 
+    dw2 = dw4 = dw5 = dw6 = 0;
+@@ -183,7 +181,6 @@
+    dw6 |= brw_count_bits(brw->fragment_program->Base.InputsRead) <<
+       GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
+ 
+-   BEGIN_BATCH(9);
+    OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
+    OUT_BATCH(brw->wm.prog_offset);
+    OUT_BATCH(dw2);
+--- src/mesa/drivers/dri/i965/brw_misc_state.c	2012-03-28 17:33:56.614254514 +0800
++++ src/mesa/drivers/dri/i965/brw_misc_state.c	2012-03-28 17:42:43.199231662 +0800
+@@ -728,10 +728,24 @@ static void upload_state_base_address( s
+     */
+ 
+    if (intel->gen >= 6) {
+-      if (intel->gen == 6)
+-	 intel_emit_post_sync_nonzero_flush(intel);
++      if ((intel->gen == 6) && intel->batch.need_workaround_flush) {
++
++         BEGIN_BATCH(18);
++         OUT_BATCH(_3DSTATE_PIPE_CONTROL);
++         OUT_BATCH(PIPE_CONTROL_CS_STALL |
++      	     PIPE_CONTROL_STALL_AT_SCOREBOARD);
++         OUT_BATCH(0); /* address */
++         OUT_BATCH(0); /* write data */
++
++         OUT_BATCH(_3DSTATE_PIPE_CONTROL);
++         OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
++         OUT_RELOC(intel->batch.workaround_bo,
++	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
++         OUT_BATCH(0); /* write data */
++
++      } else 
++	BEGIN_BATCH(10);
+ 
+-       BEGIN_BATCH(10);
+        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+        /* General state base address: stateless DP read/write requests */
+        OUT_BATCH(1);
+@@ -762,6 +776,8 @@ static void upload_state_base_address( s
+        OUT_BATCH(1); /* Indirect object upper bound */
+        OUT_BATCH(1); /* Instruction access upper bound */
+        ADVANCE_BATCH();
++       if (intel->batch.need_workaround_flush)
++          intel->batch.need_workaround_flush = false;	
+    } else if (intel->gen == 5) {
+        BEGIN_BATCH(8);
+        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+--- src/mesa/drivers/dri/i965/brw_draw.c	2012-03-30 17:40:30.144996415 +0800
++++ src/mesa/drivers/dri/i965/brw_draw.c	2012-03-30 17:44:54.000562204 +0800
+@@ -157,6 +157,12 @@ static void brw_emit_prim(struct brw_con
+    if (verts_per_instance == 0)
+       return;
+ 
++   /* workaround for gen6, reserve enough space for HW workaround */
++   if (intel->gen == 6) {
++      if (intel_batchbuffer_space(intel) < 30*4)
++         intel_batchbuffer_flush(intel);
++   }
++
+    /* If we're set to always flush, do it before and after the primitive emit.
+     * We want to catch both missed flushes that hurt instruction/state cache
+     * and missed flushes of the render cache as it heads to other parts of
+--- src/mesa/drivers/dri/i965/brw_misc_state.c	2012-03-30 17:40:30.201906235 +0800
++++ src/mesa/drivers/dri/i965/brw_misc_state.c	2012-03-30 17:44:54.001618123 +0800
+@@ -223,6 +223,12 @@ static void emit_depthbuffer(struct brw_
+    struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL;
+    unsigned int len;
+ 
++   /* workaround for gen6, reserve enough space for HW workaround */
++   if (intel->gen == 6) {
++      if (intel_batchbuffer_space(intel) < 31*4)
++         intel_batchbuffer_flush(intel);
++   }
++
+    /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
+     * non-pipelined state that will need the PIPE_CONTROL workaround.
+     */
+@@ -456,6 +462,12 @@ static void upload_polygon_stipple(struc
+    if (!ctx->Polygon.StippleFlag)
+       return;
+ 
++   /* workaround for gen6, reserve enough space for HW workaround */
++   if (intel->gen == 6) {
++      if (intel_batchbuffer_space(intel) < 41*4)
++         intel_batchbuffer_flush(intel);
++   }
++
+    if (intel->gen == 6)
+       intel_emit_post_sync_nonzero_flush(intel);
+ 
+@@ -504,6 +516,12 @@ static void upload_polygon_stipple_offse
+    if (!ctx->Polygon.StippleFlag)
+       return;
+ 
++   /* workaround for gen6, reserve enough space for HW workaround */
++   if (intel->gen == 6) {
++      if (intel_batchbuffer_space(intel) < 10*4)
++         intel_batchbuffer_flush(intel);
++   }
++
+    if (intel->gen == 6)
+       intel_emit_post_sync_nonzero_flush(intel);
+ 
+@@ -548,6 +566,12 @@ static void upload_aa_line_parameters(st
+    if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
+       return;
+ 
++   /* workaround for gen6, reserve enough space for HW workaround */
++   if (intel->gen == 6) {
++      if (intel_batchbuffer_space(intel) < 11*4)
++         intel_batchbuffer_flush(intel);
++   }
++
+    if (intel->gen == 6)
+       intel_emit_post_sync_nonzero_flush(intel);
+ 
+@@ -581,6 +605,12 @@ static void upload_line_stipple(struct b
+    if (!ctx->Line.StippleFlag)
+       return;
+ 
++   /* workaround for gen6, reserve enough space for HW workaround */
++   if (intel->gen == 6) {
++      if (intel_batchbuffer_space(intel) < 11*4)
++         intel_batchbuffer_flush(intel);
++   }
++
+    if (intel->gen == 6)
+       intel_emit_post_sync_nonzero_flush(intel);
+ 
+@@ -612,8 +642,62 @@ static void upload_invarient_state( stru
+    struct intel_context *intel = &brw->intel;
+ 
+    /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
+-   if (intel->gen == 6)
+-      intel_emit_post_sync_nonzero_flush(intel);
++   if (intel->gen == 6) {
++      int i;
++
++      if (intel->batch.need_workaround_flush) {
++
++         BEGIN_BATCH(33);
++         OUT_BATCH(_3DSTATE_PIPE_CONTROL);
++         OUT_BATCH(PIPE_CONTROL_CS_STALL |
++	     PIPE_CONTROL_STALL_AT_SCOREBOARD);
++         OUT_BATCH(0); /* address */
++         OUT_BATCH(0); /* write data */
++
++         OUT_BATCH(_3DSTATE_PIPE_CONTROL);
++         OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
++         OUT_RELOC(intel->batch.workaround_bo,
++	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
++         OUT_BATCH(0); /* write data */
++      } else
++	 BEGIN_BATCH(25);
++      /* Select the 3D pipeline (as opposed to media) */
++      OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
++
++      OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (1));
++      OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
++                MS_NUMSAMPLES_1);
++      OUT_BATCH(0); /* positions for 4/8-sample */
++
++      OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
++      OUT_BATCH(1);
++
++      if (intel->gen < 7) {
++         for (i = 0; i < 4; i++) {
++            OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
++            OUT_BATCH(i << SVB_INDEX_SHIFT);
++            OUT_BATCH(0);
++            OUT_BATCH(0xffffffff);
++         }
++      }
++
++      /* 0x61020000  State Instruction Pointer */
++      {
++         OUT_BATCH(CMD_STATE_INSN_POINTER);
++         OUT_BATCH(0);
++      }
++
++
++      {
++         OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
++            (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
++      }
++      ADVANCE_BATCH();
++
++      if (intel->batch.need_workaround_flush)
++         intel->batch.need_workaround_flush = false;
++
++   } else {
+ 
+    {
+       /* 0x61040000  Pipeline Select */
+@@ -693,6 +777,7 @@ static void upload_invarient_state( stru
+ 
+       BRW_BATCH_STRUCT(brw, &vfs);
+    }
++   }
+ }
+ 
+ const struct brw_tracked_state brw_invarient_state = {
+--- src/mesa/drivers/dri/intel/intel_batchbuffer.c	2012-03-30 17:40:29.342162385 +0800
++++ src/mesa/drivers/dri/intel/intel_batchbuffer.c	2012-03-30 17:43:45.957231205 +0800
+@@ -83,6 +83,9 @@ intel_batchbuffer_reset(struct intel_con
+ 					intel->maxBatchSize, 4096);
+ 
+    intel->batch.reserved_space = 4096;
++   /* reserve more space for hw workaround */
++   if (intel->gen == 6)
++	intel->batch.reserved_space += 14*4; 
+    intel->batch.state_batch_offset = intel->batch.bo->size;
+    intel->batch.used = 0;
+ }
+--- src/mesa/drivers/dri/i965/brw_draw_upload.c	2012-04-03 06:33:26.650545317 +0800
++++ src/mesa/drivers/dri/i965/brw_draw_upload.c	2012-04-03 06:44:34.953419019 +0800
+@@ -531,8 +531,14 @@ static void brw_emit_vertices(struct brw
+    struct intel_context *intel = intel_context(ctx);
+    GLuint i;
+ 
++   /* workaround for gen6, reserve enough space for HW workaround */
++   if (intel->gen == 6) {
++	if (intel_batchbuffer_space(intel) < (13 + 4*brw->vb.nr_buffers + 2* brw->vb.nr_enabled)*4)
++	    intel_batchbuffer_flush(intel);
++   }
+    brw_emit_query_begin(brw);
+ 
++
+    /* If the VS doesn't read any inputs (calculating vertex position from
+     * a state variable for some reason, for example), emit a single pad
+     * VERTEX_ELEMENT struct and bail.
+
+diff -rupN Mesa-7.11.2.ori/src/mesa/drivers/dri/i915/intel_batchbuffer.c Mesa-7.11.2/src/mesa/drivers/dri/i915/intel_batchbuffer.c
+--- src/mesa/drivers/dri/i915/intel_batchbuffer.c	2012-04-05 16:56:14.558005520 +0800
++++ src/mesa/drivers/dri/i915/intel_batchbuffer.c	2012-04-06 01:28:50.914943452 +0800
+@@ -427,8 +427,10 @@ intel_batchbuffer_emit_mi_flush(struct i
+ 	 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
+ 		   PIPE_CONTROL_WRITE_FLUSH |
+ 		   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
++		   PIPE_CONTROL_VF_CACHE_INVALIDATE |
+ 		   PIPE_CONTROL_TC_FLUSH |
+-		   PIPE_CONTROL_NO_WRITE);
++		   PIPE_CONTROL_NO_WRITE |
++		   PIPE_CONTROL_CS_STALL);
+ 	 OUT_BATCH(0); /* write address */
+ 	 OUT_BATCH(0); /* write data */
+ 	 ADVANCE_BATCH();
+diff -rupN Mesa-7.11.2.ori/src/mesa/drivers/dri/i965/brw_draw.c Mesa-7.11.2/src/mesa/drivers/dri/i965/brw_draw.c
+--- src/mesa/drivers/dri/i965/brw_draw.c	2012-04-05 16:56:14.560673892 +0800
++++ src/mesa/drivers/dri/i965/brw_draw.c	2012-04-05 17:20:05.997773534 +0800
+@@ -168,7 +168,7 @@ static void brw_emit_prim(struct brw_con
+     * and missed flushes of the render cache as it heads to other parts of
+     * the besides the draw code.
+     */
+-   if (intel->always_flush_cache) {
++   if ((intel->always_flush_cache) || (intel->gen == 6)) {
+       intel_batchbuffer_emit_mi_flush(intel);
+    }
+ 
+@@ -185,7 +185,7 @@ static void brw_emit_prim(struct brw_con
+ 
+    intel->batch.need_workaround_flush = true;
+ 
+-   if (intel->always_flush_cache) {
++   if ((intel->always_flush_cache) || (intel->gen == 6)) {
+       intel_batchbuffer_emit_mi_flush(intel);
+    }
+ }
+--- src/mesa/drivers/dri/i965/gen6_vs_state.c	2012-04-05 16:56:14.563966631 +0800
++++ src/mesa/drivers/dri/i965/gen6_vs_state.c	2012-04-05 16:59:20.840833882 +0800
+@@ -120,6 +120,12 @@ upload_vs_state(struct brw_context *brw)
+ {
+    struct intel_context *intel = &brw->intel;
+ 
++   /* workaround for gen6, reserve enough space for HW workaround */
++   if (intel->gen == 6) {
++       if (intel_batchbuffer_space(intel) < 23*4)
++           intel_batchbuffer_flush(intel);
++   }
++
+    if (brw->vs.push_const_size == 0) {
+       /* Disable the push constant buffers. */
+       BEGIN_BATCH(5);
+--- src/mesa/drivers/dri/i965/intel_batchbuffer.c	2012-04-05 16:56:14.564548246 +0800
++++ src/mesa/drivers/dri/i965/intel_batchbuffer.c	2012-04-06 01:28:50.914943452 +0800
+@@ -427,8 +427,10 @@ intel_batchbuffer_emit_mi_flush(struct i
+ 	 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
+ 		   PIPE_CONTROL_WRITE_FLUSH |
+ 		   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
++		   PIPE_CONTROL_VF_CACHE_INVALIDATE |
+ 		   PIPE_CONTROL_TC_FLUSH |
+-		   PIPE_CONTROL_NO_WRITE);
++		   PIPE_CONTROL_NO_WRITE |
++		   PIPE_CONTROL_CS_STALL);
+ 	 OUT_BATCH(0); /* write address */
+ 	 OUT_BATCH(0); /* write data */
+ 	 ADVANCE_BATCH();