components/golang-17/patches/0021-release-branch.go1.7-crypto-tls-fix-deadlock-when-ra.patch
changeset 7518 c388d4e1d3ad
equal deleted inserted replaced
7517:42ae3923b8fe 7518:c388d4e1d3ad
       
     1 From ca0b97e80a558d06274e68ece667b821901acc42 Mon Sep 17 00:00:00 2001
       
     2 From: Adam Langley <[email protected]>
       
     3 Date: Wed, 14 Sep 2016 11:50:36 -0700
       
     4 Subject: [PATCH 21/38] [release-branch.go1.7] crypto/tls: fix deadlock when
       
     5  racing to complete handshake.
       
     6 
       
     7 After renegotiation support was added (af125a5193c) it's possible for a
       
     8 Write to block on a Read when racing to complete the handshake:
       
     9    1. The Write determines that a handshake is needed and tries to
       
    10       take the neccesary locks in the correct order.
       
    11    2. The Read also determines that a handshake is needed and wins
       
    12       the race to take the locks.
       
    13    3. The Read goroutine completes the handshake and wins a race
       
    14       to unlock and relock c.in, which it'll hold when waiting for
       
    15       more network data.
       
    16 
       
    17 If the application-level protocol requires the Write to complete before
       
    18 data can be read then the system as a whole will deadlock.
       
    19 
       
    20 Unfortunately it doesn't appear possible to reverse the locking order of
       
    21 c.in and handshakeMutex because we might read a renegotiation request at
       
    22 any point and need to be able to do a handshake without unlocking.
       
    23 
       
    24 So this change adds a sync.Cond that indicates that a goroutine has
       
    25 committed to doing a handshake. Other interested goroutines can wait on
       
    26 that Cond when needed.
       
    27 
       
    28 The test for this isn't great. I was able to reproduce the deadlock with
       
    29 it only when building with -race. (Because -race happened to alter the
       
    30 timing just enough.)
       
    31 
       
    32 Fixes #17101.
       
    33 
       
    34 Change-Id: I4e8757f7b82a84e46c9963a977d089f0fb675495
       
    35 Reviewed-on: https://go-review.googlesource.com/29164
       
    36 Reviewed-by: Brad Fitzpatrick <[email protected]>
       
    37 Reviewed-by: Ian Lance Taylor <[email protected]>
       
    38 Reviewed-on: https://go-review.googlesource.com/31268
       
    39 ---
       
    40  src/crypto/tls/conn.go                  | 68 +++++++++++++++++++++++++--------
       
    41  src/crypto/tls/handshake_client_test.go | 54 ++++++++++++++++++++++++++
       
    42  2 files changed, 107 insertions(+), 15 deletions(-)
       
    43 
       
    44 diff --git a/src/crypto/tls/conn.go b/src/crypto/tls/conn.go
       
    45 index 87bef23..77fd6d3 100644
       
    46 --- a/src/crypto/tls/conn.go
       
    47 +++ b/src/crypto/tls/conn.go
       
    48 @@ -29,10 +29,14 @@ type Conn struct {
       
    49  
       
    50  	// constant after handshake; protected by handshakeMutex
       
    51  	handshakeMutex sync.Mutex // handshakeMutex < in.Mutex, out.Mutex, errMutex
       
    52 -	handshakeErr   error      // error resulting from handshake
       
    53 -	vers           uint16     // TLS version
       
    54 -	haveVers       bool       // version has been negotiated
       
    55 -	config         *Config    // configuration passed to constructor
       
    56 +	// handshakeCond, if not nil, indicates that a goroutine is committed
       
    57 +	// to running the handshake for this Conn. Other goroutines that need
       
    58 +	// to wait for the handshake can wait on this, under handshakeMutex.
       
    59 +	handshakeCond *sync.Cond
       
    60 +	handshakeErr  error   // error resulting from handshake
       
    61 +	vers          uint16  // TLS version
       
    62 +	haveVers      bool    // version has been negotiated
       
    63 +	config        *Config // configuration passed to constructor
       
    64  	// handshakeComplete is true if the connection is currently transfering
       
    65  	// application data (i.e. is not currently processing a handshake).
       
    66  	handshakeComplete bool
       
    67 @@ -1206,26 +1210,50 @@ func (c *Conn) Handshake() error {
       
    68  	// need to check whether a handshake is pending (such as Write) to
       
    69  	// block.
       
    70  	//
       
    71 -	// Thus we take c.handshakeMutex first and, if we find that a handshake
       
    72 -	// is needed, then we unlock, acquire c.in and c.handshakeMutex in the
       
    73 -	// correct order, and check again.
       
    74 +	// Thus we first take c.handshakeMutex to check whether a handshake is
       
    75 +	// needed.
       
    76 +	//
       
    77 +	// If so then, previously, this code would unlock handshakeMutex and
       
    78 +	// then lock c.in and handshakeMutex in the correct order to run the
       
    79 +	// handshake. The problem was that it was possible for a Read to
       
    80 +	// complete the handshake once handshakeMutex was unlocked and then
       
    81 +	// keep c.in while waiting for network data. Thus a concurrent
       
    82 +	// operation could be blocked on c.in.
       
    83 +	//
       
    84 +	// Thus handshakeCond is used to signal that a goroutine is committed
       
    85 +	// to running the handshake and other goroutines can wait on it if they
       
    86 +	// need. handshakeCond is protected by handshakeMutex.
       
    87  	c.handshakeMutex.Lock()
       
    88  	defer c.handshakeMutex.Unlock()
       
    89  
       
    90 -	for i := 0; i < 2; i++ {
       
    91 -		if i == 1 {
       
    92 -			c.handshakeMutex.Unlock()
       
    93 -			c.in.Lock()
       
    94 -			defer c.in.Unlock()
       
    95 -			c.handshakeMutex.Lock()
       
    96 -		}
       
    97 -
       
    98 +	for {
       
    99  		if err := c.handshakeErr; err != nil {
       
   100  			return err
       
   101  		}
       
   102  		if c.handshakeComplete {
       
   103  			return nil
       
   104  		}
       
   105 +		if c.handshakeCond == nil {
       
   106 +			break
       
   107 +		}
       
   108 +
       
   109 +		c.handshakeCond.Wait()
       
   110 +	}
       
   111 +
       
   112 +	// Set handshakeCond to indicate that this goroutine is committing to
       
   113 +	// running the handshake.
       
   114 +	c.handshakeCond = sync.NewCond(&c.handshakeMutex)
       
   115 +	c.handshakeMutex.Unlock()
       
   116 +
       
   117 +	c.in.Lock()
       
   118 +	defer c.in.Unlock()
       
   119 +
       
   120 +	c.handshakeMutex.Lock()
       
   121 +
       
   122 +	// The handshake cannot have completed when handshakeMutex was unlocked
       
   123 +	// because this goroutine set handshakeCond.
       
   124 +	if c.handshakeErr != nil || c.handshakeComplete {
       
   125 +		panic("handshake should not have been able to complete after handshakeCond was set")
       
   126  	}
       
   127  
       
   128  	if c.isClient {
       
   129 @@ -1236,6 +1264,16 @@ func (c *Conn) Handshake() error {
       
   130  	if c.handshakeErr == nil {
       
   131  		c.handshakes++
       
   132  	}
       
   133 +
       
   134 +	if c.handshakeErr == nil && !c.handshakeComplete {
       
   135 +		panic("handshake should have had a result.")
       
   136 +	}
       
   137 +
       
   138 +	// Wake any other goroutines that are waiting for this handshake to
       
   139 +	// complete.
       
   140 +	c.handshakeCond.Broadcast()
       
   141 +	c.handshakeCond = nil
       
   142 +
       
   143  	return c.handshakeErr
       
   144  }
       
   145  
       
   146 diff --git a/src/crypto/tls/handshake_client_test.go b/src/crypto/tls/handshake_client_test.go
       
   147 index ce987f1..07d31b6 100644
       
   148 --- a/src/crypto/tls/handshake_client_test.go
       
   149 +++ b/src/crypto/tls/handshake_client_test.go
       
   150 @@ -1045,3 +1045,57 @@ func TestBuffering(t *testing.T) {
       
   151  		t.Errorf("expected server handshake to complete with only two writes, but saw %d", n)
       
   152  	}
       
   153  }
       
   154 +
       
   155 +func TestHandshakeRace(t *testing.T) {
       
   156 +	// This test races a Read and Write to try and complete a handshake in
       
   157 +	// order to provide some evidence that there are no races or deadlocks
       
   158 +	// in the handshake locking.
       
   159 +	for i := 0; i < 32; i++ {
       
   160 +		c, s := net.Pipe()
       
   161 +
       
   162 +		go func() {
       
   163 +			server := Server(s, testConfig)
       
   164 +			if err := server.Handshake(); err != nil {
       
   165 +				panic(err)
       
   166 +			}
       
   167 +
       
   168 +			var request [1]byte
       
   169 +			if n, err := server.Read(request[:]); err != nil || n != 1 {
       
   170 +				panic(err)
       
   171 +			}
       
   172 +
       
   173 +			server.Write(request[:])
       
   174 +			server.Close()
       
   175 +		}()
       
   176 +
       
   177 +		startWrite := make(chan struct{})
       
   178 +		startRead := make(chan struct{})
       
   179 +		readDone := make(chan struct{})
       
   180 +
       
   181 +		client := Client(c, testConfig)
       
   182 +		go func() {
       
   183 +			<-startWrite
       
   184 +			var request [1]byte
       
   185 +			client.Write(request[:])
       
   186 +		}()
       
   187 +
       
   188 +		go func() {
       
   189 +			<-startRead
       
   190 +			var reply [1]byte
       
   191 +			if n, err := client.Read(reply[:]); err != nil || n != 1 {
       
   192 +				panic(err)
       
   193 +			}
       
   194 +			c.Close()
       
   195 +			readDone <- struct{}{}
       
   196 +		}()
       
   197 +
       
   198 +		if i&1 == 1 {
       
   199 +			startWrite <- struct{}{}
       
   200 +			startRead <- struct{}{}
       
   201 +		} else {
       
   202 +			startRead <- struct{}{}
       
   203 +			startWrite <- struct{}{}
       
   204 +		}
       
   205 +		<-readDone
       
   206 +	}
       
   207 +}
       
   208 -- 
       
   209 2.7.4
       
   210