|
1 From ca0b97e80a558d06274e68ece667b821901acc42 Mon Sep 17 00:00:00 2001 |
|
2 From: Adam Langley <[email protected]> |
|
3 Date: Wed, 14 Sep 2016 11:50:36 -0700 |
|
4 Subject: [PATCH 21/38] [release-branch.go1.7] crypto/tls: fix deadlock when |
|
5 racing to complete handshake. |
|
6 |
|
7 After renegotiation support was added (af125a5193c) it's possible for a |
|
8 Write to block on a Read when racing to complete the handshake: |
|
9 1. The Write determines that a handshake is needed and tries to |
|
10 take the neccesary locks in the correct order. |
|
11 2. The Read also determines that a handshake is needed and wins |
|
12 the race to take the locks. |
|
13 3. The Read goroutine completes the handshake and wins a race |
|
14 to unlock and relock c.in, which it'll hold when waiting for |
|
15 more network data. |
|
16 |
|
17 If the application-level protocol requires the Write to complete before |
|
18 data can be read then the system as a whole will deadlock. |
|
19 |
|
20 Unfortunately it doesn't appear possible to reverse the locking order of |
|
21 c.in and handshakeMutex because we might read a renegotiation request at |
|
22 any point and need to be able to do a handshake without unlocking. |
|
23 |
|
24 So this change adds a sync.Cond that indicates that a goroutine has |
|
25 committed to doing a handshake. Other interested goroutines can wait on |
|
26 that Cond when needed. |
|
27 |
|
28 The test for this isn't great. I was able to reproduce the deadlock with |
|
29 it only when building with -race. (Because -race happened to alter the |
|
30 timing just enough.) |
|
31 |
|
32 Fixes #17101. |
|
33 |
|
34 Change-Id: I4e8757f7b82a84e46c9963a977d089f0fb675495 |
|
35 Reviewed-on: https://go-review.googlesource.com/29164 |
|
36 Reviewed-by: Brad Fitzpatrick <[email protected]> |
|
37 Reviewed-by: Ian Lance Taylor <[email protected]> |
|
38 Reviewed-on: https://go-review.googlesource.com/31268 |
|
39 --- |
|
40 src/crypto/tls/conn.go | 68 +++++++++++++++++++++++++-------- |
|
41 src/crypto/tls/handshake_client_test.go | 54 ++++++++++++++++++++++++++ |
|
42 2 files changed, 107 insertions(+), 15 deletions(-) |
|
43 |
|
44 diff --git a/src/crypto/tls/conn.go b/src/crypto/tls/conn.go |
|
45 index 87bef23..77fd6d3 100644 |
|
46 --- a/src/crypto/tls/conn.go |
|
47 +++ b/src/crypto/tls/conn.go |
|
48 @@ -29,10 +29,14 @@ type Conn struct { |
|
49 |
|
50 // constant after handshake; protected by handshakeMutex |
|
51 handshakeMutex sync.Mutex // handshakeMutex < in.Mutex, out.Mutex, errMutex |
|
52 - handshakeErr error // error resulting from handshake |
|
53 - vers uint16 // TLS version |
|
54 - haveVers bool // version has been negotiated |
|
55 - config *Config // configuration passed to constructor |
|
56 + // handshakeCond, if not nil, indicates that a goroutine is committed |
|
57 + // to running the handshake for this Conn. Other goroutines that need |
|
58 + // to wait for the handshake can wait on this, under handshakeMutex. |
|
59 + handshakeCond *sync.Cond |
|
60 + handshakeErr error // error resulting from handshake |
|
61 + vers uint16 // TLS version |
|
62 + haveVers bool // version has been negotiated |
|
63 + config *Config // configuration passed to constructor |
|
64 // handshakeComplete is true if the connection is currently transfering |
|
65 // application data (i.e. is not currently processing a handshake). |
|
66 handshakeComplete bool |
|
67 @@ -1206,26 +1210,50 @@ func (c *Conn) Handshake() error { |
|
68 // need to check whether a handshake is pending (such as Write) to |
|
69 // block. |
|
70 // |
|
71 - // Thus we take c.handshakeMutex first and, if we find that a handshake |
|
72 - // is needed, then we unlock, acquire c.in and c.handshakeMutex in the |
|
73 - // correct order, and check again. |
|
74 + // Thus we first take c.handshakeMutex to check whether a handshake is |
|
75 + // needed. |
|
76 + // |
|
77 + // If so then, previously, this code would unlock handshakeMutex and |
|
78 + // then lock c.in and handshakeMutex in the correct order to run the |
|
79 + // handshake. The problem was that it was possible for a Read to |
|
80 + // complete the handshake once handshakeMutex was unlocked and then |
|
81 + // keep c.in while waiting for network data. Thus a concurrent |
|
82 + // operation could be blocked on c.in. |
|
83 + // |
|
84 + // Thus handshakeCond is used to signal that a goroutine is committed |
|
85 + // to running the handshake and other goroutines can wait on it if they |
|
86 + // need. handshakeCond is protected by handshakeMutex. |
|
87 c.handshakeMutex.Lock() |
|
88 defer c.handshakeMutex.Unlock() |
|
89 |
|
90 - for i := 0; i < 2; i++ { |
|
91 - if i == 1 { |
|
92 - c.handshakeMutex.Unlock() |
|
93 - c.in.Lock() |
|
94 - defer c.in.Unlock() |
|
95 - c.handshakeMutex.Lock() |
|
96 - } |
|
97 - |
|
98 + for { |
|
99 if err := c.handshakeErr; err != nil { |
|
100 return err |
|
101 } |
|
102 if c.handshakeComplete { |
|
103 return nil |
|
104 } |
|
105 + if c.handshakeCond == nil { |
|
106 + break |
|
107 + } |
|
108 + |
|
109 + c.handshakeCond.Wait() |
|
110 + } |
|
111 + |
|
112 + // Set handshakeCond to indicate that this goroutine is committing to |
|
113 + // running the handshake. |
|
114 + c.handshakeCond = sync.NewCond(&c.handshakeMutex) |
|
115 + c.handshakeMutex.Unlock() |
|
116 + |
|
117 + c.in.Lock() |
|
118 + defer c.in.Unlock() |
|
119 + |
|
120 + c.handshakeMutex.Lock() |
|
121 + |
|
122 + // The handshake cannot have completed when handshakeMutex was unlocked |
|
123 + // because this goroutine set handshakeCond. |
|
124 + if c.handshakeErr != nil || c.handshakeComplete { |
|
125 + panic("handshake should not have been able to complete after handshakeCond was set") |
|
126 } |
|
127 |
|
128 if c.isClient { |
|
129 @@ -1236,6 +1264,16 @@ func (c *Conn) Handshake() error { |
|
130 if c.handshakeErr == nil { |
|
131 c.handshakes++ |
|
132 } |
|
133 + |
|
134 + if c.handshakeErr == nil && !c.handshakeComplete { |
|
135 + panic("handshake should have had a result.") |
|
136 + } |
|
137 + |
|
138 + // Wake any other goroutines that are waiting for this handshake to |
|
139 + // complete. |
|
140 + c.handshakeCond.Broadcast() |
|
141 + c.handshakeCond = nil |
|
142 + |
|
143 return c.handshakeErr |
|
144 } |
|
145 |
|
146 diff --git a/src/crypto/tls/handshake_client_test.go b/src/crypto/tls/handshake_client_test.go |
|
147 index ce987f1..07d31b6 100644 |
|
148 --- a/src/crypto/tls/handshake_client_test.go |
|
149 +++ b/src/crypto/tls/handshake_client_test.go |
|
150 @@ -1045,3 +1045,57 @@ func TestBuffering(t *testing.T) { |
|
151 t.Errorf("expected server handshake to complete with only two writes, but saw %d", n) |
|
152 } |
|
153 } |
|
154 + |
|
155 +func TestHandshakeRace(t *testing.T) { |
|
156 + // This test races a Read and Write to try and complete a handshake in |
|
157 + // order to provide some evidence that there are no races or deadlocks |
|
158 + // in the handshake locking. |
|
159 + for i := 0; i < 32; i++ { |
|
160 + c, s := net.Pipe() |
|
161 + |
|
162 + go func() { |
|
163 + server := Server(s, testConfig) |
|
164 + if err := server.Handshake(); err != nil { |
|
165 + panic(err) |
|
166 + } |
|
167 + |
|
168 + var request [1]byte |
|
169 + if n, err := server.Read(request[:]); err != nil || n != 1 { |
|
170 + panic(err) |
|
171 + } |
|
172 + |
|
173 + server.Write(request[:]) |
|
174 + server.Close() |
|
175 + }() |
|
176 + |
|
177 + startWrite := make(chan struct{}) |
|
178 + startRead := make(chan struct{}) |
|
179 + readDone := make(chan struct{}) |
|
180 + |
|
181 + client := Client(c, testConfig) |
|
182 + go func() { |
|
183 + <-startWrite |
|
184 + var request [1]byte |
|
185 + client.Write(request[:]) |
|
186 + }() |
|
187 + |
|
188 + go func() { |
|
189 + <-startRead |
|
190 + var reply [1]byte |
|
191 + if n, err := client.Read(reply[:]); err != nil || n != 1 { |
|
192 + panic(err) |
|
193 + } |
|
194 + c.Close() |
|
195 + readDone <- struct{}{} |
|
196 + }() |
|
197 + |
|
198 + if i&1 == 1 { |
|
199 + startWrite <- struct{}{} |
|
200 + startRead <- struct{}{} |
|
201 + } else { |
|
202 + startRead <- struct{}{} |
|
203 + startWrite <- struct{}{} |
|
204 + } |
|
205 + <-readDone |
|
206 + } |
|
207 +} |
|
208 -- |
|
209 2.7.4 |
|
210 |