author | masputra |
Sat, 22 Oct 2005 22:50:14 -0700 | |
changeset 741 | 40027a3621ac |
parent 409 | 22012dc8ea5b |
child 1503 | 9c3595b79c0d |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
5 |
* Common Development and Distribution License, Version 1.0 only |
|
6 |
* (the "License"). You may not use this file except in compliance |
|
7 |
* with the License. |
|
8 |
* |
|
9 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
10 |
* or http://www.opensolaris.org/os/licensing. |
|
11 |
* See the License for the specific language governing permissions |
|
12 |
* and limitations under the License. |
|
13 |
* |
|
14 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
15 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
16 |
* If applicable, add the following below this CDDL HEADER, with the |
|
17 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
18 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
19 |
* |
|
20 |
* CDDL HEADER END |
|
21 |
*/ |
|
22 |
/* |
|
153
b7f7b242faa2
5013200 ipclassifier bind list insertion order is flawed in some cases
ethindra
parents:
0
diff
changeset
|
23 |
* Copyright 2005 Sun Microsystems, Inc. All rights reserved. |
0 | 24 |
* Use is subject to license terms. |
25 |
*/ |
|
26 |
||
27 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
28 |
||
29 |
const char ipclassifier_version[] = "@(#)ipclassifier.c 1.6 04/03/31 SMI"; |
|
30 |
||
31 |
/* |
|
32 |
* IP PACKET CLASSIFIER |
|
33 |
* |
|
34 |
* The IP packet classifier provides mapping between IP packets and persistent |
|
35 |
* connection state for connection-oriented protocols. It also provides |
|
36 |
* interface for managing connection states. |
|
37 |
* |
|
38 |
* The connection state is kept in conn_t data structure and contains, among |
|
39 |
* other things: |
|
40 |
* |
|
41 |
* o local/remote address and ports |
|
42 |
* o Transport protocol |
|
43 |
* o squeue for the connection (for TCP only) |
|
44 |
* o reference counter |
|
45 |
* o Connection state |
|
46 |
* o hash table linkage |
|
47 |
* o interface/ire information |
|
48 |
* o credentials |
|
49 |
* o ipsec policy |
|
50 |
* o send and receive functions. |
|
51 |
* o mutex lock. |
|
52 |
* |
|
53 |
* Connections use a reference counting scheme. They are freed when the |
|
54 |
* reference counter drops to zero. A reference is incremented when connection |
|
55 |
* is placed in a list or table, when incoming packet for the connection arrives |
|
56 |
* and when connection is processed via squeue (squeue processing may be |
|
57 |
* asynchronous and the reference protects the connection from being destroyed |
|
58 |
* before its processing is finished). |
|
59 |
* |
|
60 |
* send and receive functions are currently used for TCP only. The send function |
|
61 |
* determines the IP entry point for the packet once it leaves TCP to be sent to |
|
62 |
* the destination address. The receive function is used by IP when the packet |
|
63 |
* should be passed for TCP processing. When a new connection is created these |
|
64 |
* are set to ip_output() and tcp_input() respectively. During the lifetime of |
|
65 |
* the connection the send and receive functions may change depending on the |
|
66 |
* changes in the connection state. For example, Once the connection is bound to |
|
67 |
* an addresse, the receive function for this connection is set to |
|
68 |
* tcp_conn_request(). This allows incoming SYNs to go directly into the |
|
69 |
* listener SYN processing function without going to tcp_input() first. |
|
70 |
* |
|
71 |
* Classifier uses several hash tables: |
|
72 |
* |
|
73 |
* ipcl_conn_fanout: contains all TCP connections in CONNECTED state |
|
74 |
* ipcl_bind_fanout: contains all connections in BOUND state |
|
75 |
* ipcl_proto_fanout: IPv4 protocol fanout |
|
76 |
* ipcl_proto_fanout_v6: IPv6 protocol fanout |
|
77 |
* ipcl_udp_fanout: contains all UDP connections |
|
78 |
* ipcl_globalhash_fanout: contains all connections |
|
79 |
* |
|
80 |
* The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) |
|
81 |
* which need to view all existing connections. |
|
82 |
* |
|
83 |
* All tables are protected by per-bucket locks. When both per-bucket lock and |
|
84 |
* connection lock need to be held, the per-bucket lock should be acquired |
|
85 |
* first, followed by the connection lock. |
|
86 |
* |
|
87 |
* All functions doing search in one of these tables increment a reference |
|
88 |
* counter on the connection found (if any). This reference should be dropped |
|
89 |
* when the caller has finished processing the connection. |
|
90 |
* |
|
91 |
* |
|
92 |
* INTERFACES: |
|
93 |
* =========== |
|
94 |
* |
|
95 |
* Connection Lookup: |
|
96 |
* ------------------ |
|
97 |
* |
|
98 |
* conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid) |
|
99 |
* conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid) |
|
100 |
* |
|
101 |
* Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if |
|
102 |
* it can't find any associated connection. If the connection is found, its |
|
103 |
* reference counter is incremented. |
|
104 |
* |
|
105 |
* mp: mblock, containing packet header. The full header should fit |
|
106 |
* into a single mblock. It should also contain at least full IP |
|
107 |
* and TCP or UDP header. |
|
108 |
* |
|
109 |
* protocol: Either IPPROTO_TCP or IPPROTO_UDP. |
|
110 |
* |
|
111 |
* hdr_len: The size of IP header. It is used to find TCP or UDP header in |
|
112 |
* the packet. |
|
113 |
* |
|
114 |
* zoneid: The zone in which the returned connection must be. |
|
115 |
* |
|
116 |
* For TCP connections, the lookup order is as follows: |
|
117 |
* 5-tuple {src, dst, protocol, local port, remote port} |
|
118 |
* lookup in ipcl_conn_fanout table. |
|
119 |
* 3-tuple {dst, remote port, protocol} lookup in |
|
120 |
* ipcl_bind_fanout table. |
|
121 |
* |
|
122 |
* For UDP connections, a 5-tuple {src, dst, protocol, local port, |
|
123 |
* remote port} lookup is done on ipcl_udp_fanout. Note that, |
|
124 |
* these interfaces do not handle cases where a packets belongs |
|
125 |
* to multiple UDP clients, which is handled in IP itself. |
|
126 |
* |
|
127 |
* conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int); |
|
128 |
* conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t); |
|
129 |
* |
|
130 |
* Lookup routine to find a exact match for {src, dst, local port, |
|
131 |
* remote port) for TCP connections in ipcl_conn_fanout. The address and |
|
132 |
* ports are read from the IP and TCP header respectively. |
|
133 |
* |
|
134 |
* conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol); |
|
135 |
* conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex); |
|
136 |
* |
|
137 |
* Lookup routine to find a listener with the tuple {lport, laddr, |
|
138 |
* protocol} in the ipcl_bind_fanout table. For IPv6, an additional |
|
139 |
* parameter interface index is also compared. |
|
140 |
* |
|
141 |
* void ipcl_walk(func, arg) |
|
142 |
* |
|
143 |
* Apply 'func' to every connection available. The 'func' is called as |
|
144 |
* (*func)(connp, arg). The walk is non-atomic so connections may be |
|
145 |
* created and destroyed during the walk. The CONN_CONDEMNED and |
|
146 |
* CONN_INCIPIENT flags ensure that connections which are newly created |
|
147 |
* or being destroyed are not selected by the walker. |
|
148 |
* |
|
149 |
* Table Updates |
|
150 |
* ------------- |
|
151 |
* |
|
152 |
* int ipcl_conn_insert(connp, protocol, src, dst, ports) |
|
153 |
* int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) |
|
154 |
* |
|
155 |
* Insert 'connp' in the ipcl_conn_fanout. |
|
156 |
* Arguements : |
|
157 |
* connp conn_t to be inserted |
|
158 |
* protocol connection protocol |
|
159 |
* src source address |
|
160 |
* dst destination address |
|
161 |
* ports local and remote port |
|
162 |
* ifindex interface index for IPv6 connections |
|
163 |
* |
|
164 |
* Return value : |
|
165 |
* 0 if connp was inserted |
|
166 |
* EADDRINUSE if the connection with the same tuple |
|
167 |
* already exists. |
|
168 |
* |
|
169 |
* int ipcl_bind_insert(connp, protocol, src, lport); |
|
170 |
* int ipcl_bind_insert_v6(connp, protocol, src, lport); |
|
171 |
* |
|
172 |
* Insert 'connp' in ipcl_bind_fanout. |
|
173 |
* Arguements : |
|
174 |
* connp conn_t to be inserted |
|
175 |
* protocol connection protocol |
|
176 |
* src source address connection wants |
|
177 |
* to bind to |
|
178 |
* lport local port connection wants to |
|
179 |
* bind to |
|
180 |
* |
|
181 |
* |
|
182 |
* void ipcl_hash_remove(connp); |
|
183 |
* |
|
184 |
* Removes the 'connp' from the connection fanout table. |
|
185 |
* |
|
186 |
* Connection Creation/Destruction |
|
187 |
* ------------------------------- |
|
188 |
* |
|
189 |
* conn_t *ipcl_conn_create(type, sleep) |
|
190 |
* |
|
191 |
* Creates a new conn based on the type flag, inserts it into |
|
192 |
* globalhash table. |
|
193 |
* |
|
194 |
* type: This flag determines the type of conn_t which needs to be |
|
195 |
* created. |
|
196 |
* IPCL_TCPCONN indicates a TCP connection |
|
197 |
* IPCL_IPCONN indicates all non-TCP connections. |
|
198 |
* |
|
199 |
* void ipcl_conn_destroy(connp) |
|
200 |
* |
|
201 |
* Destroys the connection state, removes it from the global |
|
202 |
* connection hash table and frees its memory. |
|
203 |
*/ |
|
204 |
||
205 |
#include <sys/types.h> |
|
206 |
#include <sys/stream.h> |
|
207 |
#include <sys/dlpi.h> |
|
208 |
#include <sys/stropts.h> |
|
209 |
#include <sys/sysmacros.h> |
|
210 |
#include <sys/strsubr.h> |
|
211 |
#include <sys/strlog.h> |
|
212 |
#include <sys/strsun.h> |
|
213 |
#define _SUN_TPI_VERSION 2 |
|
214 |
#include <sys/ddi.h> |
|
215 |
#include <sys/cmn_err.h> |
|
216 |
#include <sys/debug.h> |
|
217 |
||
218 |
#include <sys/systm.h> |
|
219 |
#include <sys/param.h> |
|
220 |
#include <sys/kmem.h> |
|
221 |
#include <sys/isa_defs.h> |
|
222 |
#include <inet/common.h> |
|
223 |
#include <netinet/ip6.h> |
|
224 |
#include <netinet/icmp6.h> |
|
225 |
||
226 |
#include <inet/ip.h> |
|
227 |
#include <inet/ip6.h> |
|
228 |
#include <inet/tcp.h> |
|
229 |
#include <inet/tcp_trace.h> |
|
230 |
#include <inet/ip_multi.h> |
|
231 |
#include <inet/ip_if.h> |
|
232 |
#include <inet/ip_ire.h> |
|
233 |
#include <inet/ip_rts.h> |
|
234 |
#include <inet/optcom.h> |
|
235 |
#include <inet/ip_ndp.h> |
|
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
236 |
#include <inet/udp_impl.h> |
0 | 237 |
#include <inet/sctp_ip.h> |
238 |
||
239 |
#include <sys/ethernet.h> |
|
240 |
#include <net/if_types.h> |
|
241 |
#include <sys/cpuvar.h> |
|
242 |
||
243 |
#include <inet/mi.h> |
|
244 |
#include <inet/ipclassifier.h> |
|
245 |
#include <inet/ipsec_impl.h> |
|
246 |
||
247 |
#ifdef DEBUG |
|
248 |
#define IPCL_DEBUG |
|
249 |
#else |
|
250 |
#undef IPCL_DEBUG |
|
251 |
#endif |
|
252 |
||
253 |
#ifdef IPCL_DEBUG |
|
254 |
int ipcl_debug_level = 0; |
|
255 |
#define IPCL_DEBUG_LVL(level, args) \ |
|
256 |
if (ipcl_debug_level & level) { printf args; } |
|
257 |
#else |
|
258 |
#define IPCL_DEBUG_LVL(level, args) {; } |
|
259 |
#endif |
|
260 |
connf_t *ipcl_conn_fanout; |
|
261 |
connf_t *ipcl_bind_fanout; |
|
262 |
connf_t ipcl_proto_fanout[IPPROTO_MAX + 1]; |
|
263 |
connf_t ipcl_proto_fanout_v6[IPPROTO_MAX + 1]; |
|
264 |
connf_t *ipcl_udp_fanout; |
|
265 |
||
266 |
/* A separate hash list for raw socket. */ |
|
267 |
connf_t *ipcl_raw_fanout; |
|
268 |
||
269 |
connf_t rts_clients; |
|
270 |
||
271 |
/* Old value for compatibility */ |
|
272 |
uint_t tcp_conn_hash_size = 0; |
|
273 |
||
274 |
/* New value. Zero means choose automatically. */ |
|
275 |
uint_t ipcl_conn_hash_size = 0; |
|
276 |
uint_t ipcl_conn_hash_memfactor = 8192; |
|
277 |
uint_t ipcl_conn_hash_maxsize = 82500; |
|
278 |
||
279 |
uint_t ipcl_conn_fanout_size = 0; |
|
280 |
||
281 |
||
282 |
/* bind/udp fanout table size */ |
|
283 |
uint_t ipcl_bind_fanout_size = 512; |
|
284 |
uint_t ipcl_udp_fanout_size = 256; |
|
285 |
||
286 |
/* Raw socket fanout size. Must be a power of 2. */ |
|
287 |
uint_t ipcl_raw_fanout_size = 256; |
|
288 |
||
289 |
/* |
|
290 |
* Power of 2^N Primes useful for hashing for N of 0-28, |
|
291 |
* these primes are the nearest prime <= 2^N - 2^(N-2). |
|
292 |
*/ |
|
293 |
||
294 |
#define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ |
|
295 |
6143, 12281, 24571, 49139, 98299, 196597, 393209, \ |
|
296 |
786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ |
|
297 |
50331599, 100663291, 201326557, 0} |
|
298 |
||
299 |
/* |
|
300 |
* wrapper structure to ensure that conn+tcpb are aligned |
|
301 |
* on cache lines. |
|
302 |
*/ |
|
303 |
typedef struct itc_s { |
|
304 |
union { |
|
305 |
conn_t itcu_conn; |
|
306 |
char itcu_filler[CACHE_ALIGN(conn_s)]; |
|
307 |
} itc_u; |
|
308 |
tcp_t itc_tcp; |
|
309 |
} itc_t; |
|
310 |
||
311 |
#define itc_conn itc_u.itcu_conn |
|
312 |
||
313 |
struct kmem_cache *ipcl_tcpconn_cache; |
|
314 |
struct kmem_cache *ipcl_tcp_cache; |
|
315 |
struct kmem_cache *ipcl_conn_cache; |
|
316 |
extern struct kmem_cache *sctp_conn_cache; |
|
317 |
extern struct kmem_cache *tcp_sack_info_cache; |
|
318 |
extern struct kmem_cache *tcp_iphc_cache; |
|
319 |
||
320 |
extern void tcp_timermp_free(tcp_t *); |
|
321 |
extern mblk_t *tcp_timermp_alloc(int); |
|
322 |
||
323 |
static int ipcl_tcpconn_constructor(void *, void *, int); |
|
324 |
static void ipcl_tcpconn_destructor(void *, void *); |
|
325 |
||
326 |
static int conn_g_index; |
|
327 |
connf_t *ipcl_globalhash_fanout; |
|
328 |
||
329 |
#ifdef IPCL_DEBUG |
|
330 |
#define INET_NTOA_BUFSIZE 18 |
|
331 |
||
332 |
static char * |
|
333 |
inet_ntoa_r(uint32_t in, char *b) |
|
334 |
{ |
|
335 |
unsigned char *p; |
|
336 |
||
337 |
p = (unsigned char *)∈ |
|
338 |
(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); |
|
339 |
return (b); |
|
340 |
} |
|
341 |
#endif |
|
342 |
||
343 |
/* |
|
344 |
* ipclassifier intialization routine, sets up hash tables and |
|
345 |
* conn caches. |
|
346 |
*/ |
|
347 |
void |
|
348 |
ipcl_init(void) |
|
349 |
{ |
|
350 |
int i; |
|
351 |
int sizes[] = P2Ps(); |
|
352 |
||
353 |
ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache", |
|
354 |
sizeof (conn_t), CACHE_ALIGN_SIZE, |
|
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
355 |
NULL, NULL, NULL, NULL, NULL, 0); |
0 | 356 |
|
357 |
ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache", |
|
358 |
sizeof (itc_t), CACHE_ALIGN_SIZE, |
|
359 |
ipcl_tcpconn_constructor, ipcl_tcpconn_destructor, |
|
360 |
NULL, NULL, NULL, 0); |
|
361 |
||
362 |
/* |
|
363 |
* Calculate size of conn fanout table. |
|
364 |
*/ |
|
365 |
if (ipcl_conn_hash_size != 0) { |
|
366 |
ipcl_conn_fanout_size = ipcl_conn_hash_size; |
|
367 |
} else if (tcp_conn_hash_size != 0) { |
|
368 |
ipcl_conn_fanout_size = tcp_conn_hash_size; |
|
369 |
} else { |
|
370 |
extern pgcnt_t freemem; |
|
371 |
||
372 |
ipcl_conn_fanout_size = |
|
373 |
(freemem * PAGESIZE) / ipcl_conn_hash_memfactor; |
|
374 |
||
375 |
if (ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) |
|
376 |
ipcl_conn_fanout_size = ipcl_conn_hash_maxsize; |
|
377 |
} |
|
378 |
||
379 |
for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { |
|
380 |
if (sizes[i] >= ipcl_conn_fanout_size) { |
|
381 |
break; |
|
382 |
} |
|
383 |
} |
|
384 |
if ((ipcl_conn_fanout_size = sizes[i]) == 0) { |
|
385 |
/* Out of range, use the 2^16 value */ |
|
386 |
ipcl_conn_fanout_size = sizes[16]; |
|
387 |
} |
|
388 |
ipcl_conn_fanout = (connf_t *)kmem_zalloc(ipcl_conn_fanout_size * |
|
389 |
sizeof (*ipcl_conn_fanout), KM_SLEEP); |
|
390 |
||
391 |
for (i = 0; i < ipcl_conn_fanout_size; i++) { |
|
392 |
mutex_init(&ipcl_conn_fanout[i].connf_lock, NULL, |
|
393 |
MUTEX_DEFAULT, NULL); |
|
394 |
} |
|
395 |
||
396 |
ipcl_bind_fanout = (connf_t *)kmem_zalloc(ipcl_bind_fanout_size * |
|
397 |
sizeof (*ipcl_bind_fanout), KM_SLEEP); |
|
398 |
||
399 |
for (i = 0; i < ipcl_bind_fanout_size; i++) { |
|
400 |
mutex_init(&ipcl_bind_fanout[i].connf_lock, NULL, |
|
401 |
MUTEX_DEFAULT, NULL); |
|
402 |
} |
|
403 |
||
404 |
for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) { |
|
405 |
mutex_init(&ipcl_proto_fanout[i].connf_lock, NULL, |
|
406 |
MUTEX_DEFAULT, NULL); |
|
407 |
} |
|
408 |
for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) { |
|
409 |
mutex_init(&ipcl_proto_fanout_v6[i].connf_lock, NULL, |
|
410 |
MUTEX_DEFAULT, NULL); |
|
411 |
} |
|
412 |
||
413 |
mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL); |
|
414 |
||
415 |
ipcl_udp_fanout = (connf_t *)kmem_zalloc(ipcl_udp_fanout_size * |
|
416 |
sizeof (*ipcl_udp_fanout), KM_SLEEP); |
|
417 |
||
418 |
for (i = 0; i < ipcl_udp_fanout_size; i++) { |
|
419 |
mutex_init(&ipcl_udp_fanout[i].connf_lock, NULL, |
|
420 |
MUTEX_DEFAULT, NULL); |
|
421 |
} |
|
422 |
||
423 |
ipcl_raw_fanout = (connf_t *)kmem_zalloc(ipcl_raw_fanout_size * |
|
424 |
sizeof (*ipcl_raw_fanout), KM_SLEEP); |
|
425 |
||
426 |
for (i = 0; i < ipcl_raw_fanout_size; i++) { |
|
427 |
mutex_init(&ipcl_raw_fanout[i].connf_lock, NULL, |
|
428 |
MUTEX_DEFAULT, NULL); |
|
429 |
} |
|
430 |
||
431 |
ipcl_globalhash_fanout = (connf_t *)kmem_zalloc(sizeof (connf_t) * |
|
432 |
CONN_G_HASH_SIZE, KM_SLEEP); |
|
433 |
||
434 |
for (i = 0; i < CONN_G_HASH_SIZE; i++) { |
|
435 |
mutex_init(&ipcl_globalhash_fanout[i].connf_lock, NULL, |
|
436 |
MUTEX_DEFAULT, NULL); |
|
437 |
} |
|
438 |
} |
|
439 |
||
440 |
void |
|
441 |
ipcl_destroy(void) |
|
442 |
{ |
|
443 |
int i; |
|
444 |
kmem_cache_destroy(ipcl_conn_cache); |
|
445 |
kmem_cache_destroy(ipcl_tcpconn_cache); |
|
446 |
for (i = 0; i < ipcl_conn_fanout_size; i++) |
|
447 |
mutex_destroy(&ipcl_conn_fanout[i].connf_lock); |
|
448 |
kmem_free(ipcl_conn_fanout, ipcl_conn_fanout_size * |
|
449 |
sizeof (*ipcl_conn_fanout)); |
|
450 |
for (i = 0; i < ipcl_bind_fanout_size; i++) |
|
451 |
mutex_destroy(&ipcl_bind_fanout[i].connf_lock); |
|
452 |
kmem_free(ipcl_bind_fanout, ipcl_bind_fanout_size * |
|
453 |
sizeof (*ipcl_bind_fanout)); |
|
454 |
||
455 |
for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) |
|
456 |
mutex_destroy(&ipcl_proto_fanout[i].connf_lock); |
|
457 |
for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) |
|
458 |
mutex_destroy(&ipcl_proto_fanout_v6[i].connf_lock); |
|
459 |
||
460 |
for (i = 0; i < ipcl_udp_fanout_size; i++) |
|
461 |
mutex_destroy(&ipcl_udp_fanout[i].connf_lock); |
|
462 |
kmem_free(ipcl_udp_fanout, ipcl_udp_fanout_size * |
|
463 |
sizeof (*ipcl_udp_fanout)); |
|
464 |
||
465 |
for (i = 0; i < ipcl_raw_fanout_size; i++) |
|
466 |
mutex_destroy(&ipcl_raw_fanout[i].connf_lock); |
|
467 |
kmem_free(ipcl_raw_fanout, ipcl_raw_fanout_size * |
|
468 |
sizeof (*ipcl_raw_fanout)); |
|
469 |
||
470 |
kmem_free(ipcl_globalhash_fanout, sizeof (connf_t) * CONN_G_HASH_SIZE); |
|
471 |
mutex_destroy(&rts_clients.connf_lock); |
|
472 |
} |
|
473 |
||
474 |
/* |
|
475 |
* conn creation routine. initialize the conn, sets the reference |
|
476 |
* and inserts it in the global hash table. |
|
477 |
*/ |
|
478 |
conn_t * |
|
479 |
ipcl_conn_create(uint32_t type, int sleep) |
|
480 |
{ |
|
481 |
itc_t *itc; |
|
482 |
conn_t *connp; |
|
483 |
||
484 |
switch (type) { |
|
485 |
case IPCL_TCPCONN: |
|
486 |
if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache, |
|
487 |
sleep)) == NULL) |
|
488 |
return (NULL); |
|
489 |
connp = &itc->itc_conn; |
|
490 |
connp->conn_ref = 1; |
|
491 |
IPCL_DEBUG_LVL(1, |
|
492 |
("ipcl_conn_create: connp = %p tcp (%p)", |
|
493 |
(void *)connp, (void *)connp->conn_tcp)); |
|
494 |
ipcl_globalhash_insert(connp); |
|
495 |
break; |
|
496 |
case IPCL_SCTPCONN: |
|
497 |
if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) |
|
498 |
return (NULL); |
|
499 |
connp->conn_flags = IPCL_SCTPCONN; |
|
500 |
break; |
|
501 |
case IPCL_IPCCONN: |
|
502 |
connp = kmem_cache_alloc(ipcl_conn_cache, sleep); |
|
503 |
if (connp == NULL) |
|
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
504 |
return (NULL); |
0 | 505 |
bzero(connp, sizeof (conn_t)); |
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
506 |
mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); |
0 | 507 |
cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); |
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
508 |
connp->conn_flags = IPCL_IPCCONN; |
0 | 509 |
connp->conn_ref = 1; |
510 |
IPCL_DEBUG_LVL(1, |
|
511 |
("ipcl_conn_create: connp = %p\n", (void *)connp)); |
|
512 |
ipcl_globalhash_insert(connp); |
|
513 |
break; |
|
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
514 |
default: |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
515 |
connp = NULL; |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
516 |
ASSERT(0); |
0 | 517 |
} |
518 |
||
519 |
return (connp); |
|
520 |
} |
|
521 |
||
522 |
void |
|
523 |
ipcl_conn_destroy(conn_t *connp) |
|
524 |
{ |
|
525 |
mblk_t *mp; |
|
526 |
||
527 |
ASSERT(!MUTEX_HELD(&connp->conn_lock)); |
|
528 |
ASSERT(connp->conn_ref == 0); |
|
529 |
ASSERT(connp->conn_ire_cache == NULL); |
|
530 |
||
531 |
ipcl_globalhash_remove(connp); |
|
532 |
||
533 |
cv_destroy(&connp->conn_cv); |
|
534 |
if (connp->conn_flags & IPCL_TCPCONN) { |
|
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
535 |
tcp_t *tcp = connp->conn_tcp; |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
536 |
|
0 | 537 |
mutex_destroy(&connp->conn_lock); |
538 |
ASSERT(connp->conn_tcp != NULL); |
|
539 |
tcp_free(tcp); |
|
540 |
mp = tcp->tcp_timercache; |
|
541 |
||
542 |
if (tcp->tcp_sack_info != NULL) { |
|
543 |
bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); |
|
544 |
kmem_cache_free(tcp_sack_info_cache, |
|
545 |
tcp->tcp_sack_info); |
|
546 |
} |
|
547 |
if (tcp->tcp_iphc != NULL) { |
|
548 |
if (tcp->tcp_hdr_grown) { |
|
549 |
kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); |
|
550 |
} else { |
|
551 |
bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); |
|
552 |
kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); |
|
553 |
} |
|
554 |
tcp->tcp_iphc_len = 0; |
|
555 |
} |
|
556 |
ASSERT(tcp->tcp_iphc_len == 0); |
|
557 |
||
558 |
if (connp->conn_latch != NULL) |
|
559 |
IPLATCH_REFRELE(connp->conn_latch); |
|
560 |
if (connp->conn_policy != NULL) |
|
561 |
IPPH_REFRELE(connp->conn_policy); |
|
562 |
bzero(connp, sizeof (itc_t)); |
|
563 |
||
564 |
tcp->tcp_timercache = mp; |
|
565 |
connp->conn_tcp = tcp; |
|
566 |
connp->conn_flags = IPCL_TCPCONN; |
|
567 |
connp->conn_ulp = IPPROTO_TCP; |
|
568 |
tcp->tcp_connp = connp; |
|
569 |
kmem_cache_free(ipcl_tcpconn_cache, connp); |
|
570 |
} else if (connp->conn_flags & IPCL_SCTPCONN) { |
|
571 |
sctp_free(connp); |
|
572 |
} else { |
|
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
573 |
ASSERT(connp->conn_udp == NULL); |
0 | 574 |
mutex_destroy(&connp->conn_lock); |
575 |
kmem_cache_free(ipcl_conn_cache, connp); |
|
576 |
} |
|
577 |
} |
|
578 |
||
579 |
/* |
|
580 |
* Running in cluster mode - deregister listener information |
|
581 |
*/ |
|
582 |
||
583 |
static void |
|
584 |
ipcl_conn_unlisten(conn_t *connp) |
|
585 |
{ |
|
586 |
ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); |
|
587 |
ASSERT(connp->conn_lport != 0); |
|
588 |
||
589 |
if (cl_inet_unlisten != NULL) { |
|
590 |
sa_family_t addr_family; |
|
591 |
uint8_t *laddrp; |
|
592 |
||
593 |
if (connp->conn_pkt_isv6) { |
|
594 |
addr_family = AF_INET6; |
|
595 |
laddrp = (uint8_t *)&connp->conn_bound_source_v6; |
|
596 |
} else { |
|
597 |
addr_family = AF_INET; |
|
598 |
laddrp = (uint8_t *)&connp->conn_bound_source; |
|
599 |
} |
|
600 |
(*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, |
|
601 |
connp->conn_lport); |
|
602 |
} |
|
603 |
connp->conn_flags &= ~IPCL_CL_LISTENER; |
|
604 |
} |
|
605 |
||
606 |
/* |
|
607 |
* We set the IPCL_REMOVED flag (instead of clearing the flag indicating |
|
608 |
* which table the conn belonged to). So for debugging we can see which hash |
|
609 |
* table this connection was in. |
|
610 |
*/ |
|
611 |
#define IPCL_HASH_REMOVE(connp) { \ |
|
612 |
connf_t *connfp = (connp)->conn_fanout; \ |
|
613 |
ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ |
|
614 |
if (connfp != NULL) { \ |
|
615 |
IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ |
|
616 |
(void *)(connp))); \ |
|
617 |
mutex_enter(&connfp->connf_lock); \ |
|
618 |
if ((connp)->conn_next != NULL) \ |
|
619 |
(connp)->conn_next->conn_prev = \ |
|
620 |
(connp)->conn_prev; \ |
|
621 |
if ((connp)->conn_prev != NULL) \ |
|
622 |
(connp)->conn_prev->conn_next = \ |
|
623 |
(connp)->conn_next; \ |
|
624 |
else \ |
|
625 |
connfp->connf_head = (connp)->conn_next; \ |
|
626 |
(connp)->conn_fanout = NULL; \ |
|
627 |
(connp)->conn_next = NULL; \ |
|
628 |
(connp)->conn_prev = NULL; \ |
|
629 |
(connp)->conn_flags |= IPCL_REMOVED; \ |
|
630 |
if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ |
|
631 |
ipcl_conn_unlisten((connp)); \ |
|
632 |
CONN_DEC_REF((connp)); \ |
|
633 |
mutex_exit(&connfp->connf_lock); \ |
|
634 |
} \ |
|
635 |
} |
|
636 |
||
637 |
void |
|
638 |
ipcl_hash_remove(conn_t *connp) |
|
639 |
{ |
|
640 |
IPCL_HASH_REMOVE(connp); |
|
641 |
} |
|
642 |
||
643 |
/* |
|
644 |
* The whole purpose of this function is allow removal of |
|
645 |
* a conn_t from the connected hash for timewait reclaim. |
|
646 |
* This is essentially a TW reclaim fastpath where timewait |
|
647 |
* collector checks under fanout lock (so no one else can |
|
648 |
* get access to the conn_t) that refcnt is 2 i.e. one for |
|
649 |
* TCP and one for the classifier hash list. If ref count |
|
650 |
* is indeed 2, we can just remove the conn under lock and |
|
651 |
* avoid cleaning up the conn under squeue. This gives us |
|
652 |
* improved performance. |
|
653 |
*/ |
|
654 |
void |
|
655 |
ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) |
|
656 |
{ |
|
657 |
ASSERT(MUTEX_HELD(&connfp->connf_lock)); |
|
658 |
ASSERT(MUTEX_HELD(&connp->conn_lock)); |
|
659 |
ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); |
|
660 |
||
661 |
if ((connp)->conn_next != NULL) { |
|
662 |
(connp)->conn_next->conn_prev = |
|
663 |
(connp)->conn_prev; |
|
664 |
} |
|
665 |
if ((connp)->conn_prev != NULL) { |
|
666 |
(connp)->conn_prev->conn_next = |
|
667 |
(connp)->conn_next; |
|
668 |
} else { |
|
669 |
connfp->connf_head = (connp)->conn_next; |
|
670 |
} |
|
671 |
(connp)->conn_fanout = NULL; |
|
672 |
(connp)->conn_next = NULL; |
|
673 |
(connp)->conn_prev = NULL; |
|
674 |
(connp)->conn_flags |= IPCL_REMOVED; |
|
675 |
ASSERT((connp)->conn_ref == 2); |
|
676 |
(connp)->conn_ref--; |
|
677 |
} |
|
678 |
||
679 |
#define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ |
|
680 |
ASSERT((connp)->conn_fanout == NULL); \ |
|
681 |
ASSERT((connp)->conn_next == NULL); \ |
|
682 |
ASSERT((connp)->conn_prev == NULL); \ |
|
683 |
if ((connfp)->connf_head != NULL) { \ |
|
684 |
(connfp)->connf_head->conn_prev = (connp); \ |
|
685 |
(connp)->conn_next = (connfp)->connf_head; \ |
|
686 |
} \ |
|
687 |
(connp)->conn_fanout = (connfp); \ |
|
688 |
(connfp)->connf_head = (connp); \ |
|
689 |
(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ |
|
690 |
IPCL_CONNECTED; \ |
|
691 |
CONN_INC_REF(connp); \ |
|
692 |
} |
|
693 |
||
694 |
#define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ |
|
695 |
IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ |
|
696 |
"connp %p", (void *)(connfp), (void *)(connp))); \ |
|
697 |
IPCL_HASH_REMOVE((connp)); \ |
|
698 |
mutex_enter(&(connfp)->connf_lock); \ |
|
699 |
IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ |
|
700 |
mutex_exit(&(connfp)->connf_lock); \ |
|
701 |
} |
|
702 |
||
703 |
#define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ |
|
704 |
conn_t *pconnp = NULL, *nconnp; \ |
|
705 |
IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ |
|
706 |
"connp %p", (void *)connfp, (void *)(connp))); \ |
|
707 |
IPCL_HASH_REMOVE((connp)); \ |
|
708 |
mutex_enter(&(connfp)->connf_lock); \ |
|
709 |
nconnp = (connfp)->connf_head; \ |
|
153
b7f7b242faa2
5013200 ipclassifier bind list insertion order is flawed in some cases
ethindra
parents:
0
diff
changeset
|
710 |
while (nconnp != NULL && \ |
b7f7b242faa2
5013200 ipclassifier bind list insertion order is flawed in some cases
ethindra
parents:
0
diff
changeset
|
711 |
!_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ |
b7f7b242faa2
5013200 ipclassifier bind list insertion order is flawed in some cases
ethindra
parents:
0
diff
changeset
|
712 |
pconnp = nconnp; \ |
b7f7b242faa2
5013200 ipclassifier bind list insertion order is flawed in some cases
ethindra
parents:
0
diff
changeset
|
713 |
nconnp = nconnp->conn_next; \ |
0 | 714 |
} \ |
715 |
if (pconnp != NULL) { \ |
|
716 |
pconnp->conn_next = (connp); \ |
|
717 |
(connp)->conn_prev = pconnp; \ |
|
718 |
} else { \ |
|
719 |
(connfp)->connf_head = (connp); \ |
|
720 |
} \ |
|
721 |
if (nconnp != NULL) { \ |
|
722 |
(connp)->conn_next = nconnp; \ |
|
723 |
nconnp->conn_prev = (connp); \ |
|
724 |
} \ |
|
725 |
(connp)->conn_fanout = (connfp); \ |
|
726 |
(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ |
|
727 |
IPCL_BOUND; \ |
|
728 |
CONN_INC_REF(connp); \ |
|
729 |
mutex_exit(&(connfp)->connf_lock); \ |
|
730 |
} |
|
731 |
||
732 |
#define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ |
|
733 |
conn_t **list, *prev, *next; \ |
|
734 |
boolean_t isv4mapped = \ |
|
735 |
IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ |
|
736 |
IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ |
|
737 |
"connp %p", (void *)(connfp), (void *)(connp))); \ |
|
738 |
IPCL_HASH_REMOVE((connp)); \ |
|
739 |
mutex_enter(&(connfp)->connf_lock); \ |
|
740 |
list = &(connfp)->connf_head; \ |
|
741 |
prev = NULL; \ |
|
742 |
while ((next = *list) != NULL) { \ |
|
743 |
if (isv4mapped && \ |
|
744 |
IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ |
|
745 |
connp->conn_zoneid == next->conn_zoneid) { \ |
|
746 |
(connp)->conn_next = next; \ |
|
747 |
if (prev != NULL) \ |
|
748 |
prev = next->conn_prev; \ |
|
749 |
next->conn_prev = (connp); \ |
|
750 |
break; \ |
|
751 |
} \ |
|
752 |
list = &next->conn_next; \ |
|
753 |
prev = next; \ |
|
754 |
} \ |
|
755 |
(connp)->conn_prev = prev; \ |
|
756 |
*list = (connp); \ |
|
757 |
(connp)->conn_fanout = (connfp); \ |
|
758 |
(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ |
|
759 |
IPCL_BOUND; \ |
|
760 |
CONN_INC_REF((connp)); \ |
|
761 |
mutex_exit(&(connfp)->connf_lock); \ |
|
762 |
} |
|
763 |
||
764 |
void |
|
765 |
ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) |
|
766 |
{ |
|
767 |
IPCL_HASH_INSERT_WILDCARD(connfp, connp); |
|
768 |
} |
|
769 |
||
770 |
void |
|
771 |
ipcl_proto_insert(conn_t *connp, uint8_t protocol) |
|
772 |
{ |
|
773 |
connf_t *connfp; |
|
774 |
||
775 |
ASSERT(connp != NULL); |
|
776 |
||
777 |
connp->conn_ulp = protocol; |
|
778 |
||
779 |
/* Insert it in the protocol hash */ |
|
780 |
connfp = &ipcl_proto_fanout[protocol]; |
|
781 |
IPCL_HASH_INSERT_WILDCARD(connfp, connp); |
|
782 |
} |
|
783 |
||
784 |
void |
|
785 |
ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) |
|
786 |
{ |
|
787 |
connf_t *connfp; |
|
788 |
||
789 |
ASSERT(connp != NULL); |
|
790 |
||
791 |
connp->conn_ulp = protocol; |
|
792 |
||
793 |
/* Insert it in the Bind Hash */ |
|
794 |
connfp = &ipcl_proto_fanout_v6[protocol]; |
|
795 |
IPCL_HASH_INSERT_WILDCARD(connfp, connp); |
|
796 |
} |
|
797 |
||
798 |
/* |
|
799 |
* This function is used only for inserting SCTP raw socket now. |
|
800 |
* This may change later. |
|
801 |
* |
|
802 |
* Note that only one raw socket can be bound to a port. The param |
|
803 |
* lport is in network byte order. |
|
804 |
*/ |
|
805 |
static int |
|
806 |
ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) |
|
807 |
{ |
|
808 |
connf_t *connfp; |
|
809 |
conn_t *oconnp; |
|
810 |
||
811 |
connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; |
|
812 |
||
813 |
/* Check for existing raw socket already bound to the port. */ |
|
814 |
mutex_enter(&connfp->connf_lock); |
|
815 |
for (oconnp = connfp->connf_head; oconnp != NULL; |
|
409
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
816 |
oconnp = oconnp->conn_next) { |
0 | 817 |
if (oconnp->conn_lport == lport && |
818 |
oconnp->conn_zoneid == connp->conn_zoneid && |
|
819 |
oconnp->conn_af_isv6 == connp->conn_af_isv6 && |
|
820 |
((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || |
|
821 |
IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || |
|
822 |
IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || |
|
823 |
IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || |
|
824 |
IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, |
|
825 |
&connp->conn_srcv6))) { |
|
826 |
break; |
|
827 |
} |
|
828 |
} |
|
829 |
mutex_exit(&connfp->connf_lock); |
|
830 |
if (oconnp != NULL) |
|
831 |
return (EADDRNOTAVAIL); |
|
832 |
||
833 |
if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || |
|
834 |
IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { |
|
835 |
if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || |
|
836 |
IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { |
|
837 |
IPCL_HASH_INSERT_WILDCARD(connfp, connp); |
|
838 |
} else { |
|
839 |
IPCL_HASH_INSERT_BOUND(connfp, connp); |
|
840 |
} |
|
841 |
} else { |
|
842 |
IPCL_HASH_INSERT_CONNECTED(connfp, connp); |
|
843 |
} |
|
844 |
return (0); |
|
845 |
} |
|
846 |
||
847 |
/* |
|
848 |
* (v4, v6) bind hash insertion routines |
|
849 |
*/ |
|
850 |
int |
|
851 |
ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) |
|
852 |
{ |
|
853 |
connf_t *connfp; |
|
854 |
#ifdef IPCL_DEBUG |
|
855 |
char buf[INET_NTOA_BUFSIZE]; |
|
856 |
#endif |
|
857 |
int ret = 0; |
|
858 |
||
859 |
ASSERT(connp); |
|
860 |
||
861 |
IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " |
|
862 |
"port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); |
|
863 |
||
864 |
connp->conn_ulp = protocol; |
|
865 |
IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); |
|
866 |
connp->conn_lport = lport; |
|
867 |
||
868 |
switch (protocol) { |
|
869 |
case IPPROTO_UDP: |
|
870 |
default: |
|
871 |
if (protocol == IPPROTO_UDP) { |
|
872 |
IPCL_DEBUG_LVL(64, |
|
873 |
("ipcl_bind_insert: connp %p - udp\n", |
|
874 |
(void *)connp)); |
|
875 |
connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; |
|
876 |
} else { |
|
877 |
IPCL_DEBUG_LVL(64, |
|
878 |
("ipcl_bind_insert: connp %p - protocol\n", |
|
879 |
(void *)connp)); |
|
880 |
connfp = &ipcl_proto_fanout[protocol]; |
|
881 |
} |
|
882 |
||
883 |
if (connp->conn_rem != INADDR_ANY) { |
|
884 |
IPCL_HASH_INSERT_CONNECTED(connfp, connp); |
|
885 |
} else if (connp->conn_src != INADDR_ANY) { |
|
886 |
IPCL_HASH_INSERT_BOUND(connfp, connp); |
|
887 |
} else { |
|
888 |
IPCL_HASH_INSERT_WILDCARD(connfp, connp); |
|
889 |
} |
|
890 |
break; |
|
891 |
||
892 |
case IPPROTO_TCP: |
|
893 |
||
894 |
/* Insert it in the Bind Hash */ |
|
895 |
connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; |
|
896 |
if (connp->conn_src != INADDR_ANY) { |
|
897 |
IPCL_HASH_INSERT_BOUND(connfp, connp); |
|
898 |
} else { |
|
899 |
IPCL_HASH_INSERT_WILDCARD(connfp, connp); |
|
900 |
} |
|
901 |
if (cl_inet_listen != NULL) { |
|
902 |
ASSERT(!connp->conn_pkt_isv6); |
|
903 |
connp->conn_flags |= IPCL_CL_LISTENER; |
|
904 |
(*cl_inet_listen)(IPPROTO_TCP, AF_INET, |
|
905 |
(uint8_t *)&connp->conn_bound_source, lport); |
|
906 |
} |
|
907 |
break; |
|
908 |
||
909 |
case IPPROTO_SCTP: |
|
910 |
ret = ipcl_sctp_hash_insert(connp, lport); |
|
911 |
break; |
|
912 |
} |
|
913 |
||
914 |
return (ret); |
|
915 |
} |
|
916 |
||
917 |
int |
|
918 |
ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, |
|
919 |
uint16_t lport) |
|
920 |
{ |
|
921 |
connf_t *connfp; |
|
922 |
int ret = 0; |
|
923 |
||
924 |
ASSERT(connp); |
|
925 |
||
926 |
connp->conn_ulp = protocol; |
|
927 |
connp->conn_srcv6 = *src; |
|
928 |
connp->conn_lport = lport; |
|
929 |
||
930 |
switch (protocol) { |
|
931 |
case IPPROTO_UDP: |
|
932 |
default: |
|
933 |
if (protocol == IPPROTO_UDP) { |
|
934 |
IPCL_DEBUG_LVL(128, |
|
935 |
("ipcl_bind_insert_v6: connp %p - udp\n", |
|
936 |
(void *)connp)); |
|
937 |
connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; |
|
938 |
} else { |
|
939 |
IPCL_DEBUG_LVL(128, |
|
940 |
("ipcl_bind_insert_v6: connp %p - protocol\n", |
|
941 |
(void *)connp)); |
|
942 |
connfp = &ipcl_proto_fanout_v6[protocol]; |
|
943 |
} |
|
944 |
||
945 |
if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { |
|
946 |
IPCL_HASH_INSERT_CONNECTED(connfp, connp); |
|
947 |
} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { |
|
948 |
IPCL_HASH_INSERT_BOUND(connfp, connp); |
|
949 |
} else { |
|
950 |
IPCL_HASH_INSERT_WILDCARD(connfp, connp); |
|
951 |
} |
|
952 |
break; |
|
953 |
||
954 |
case IPPROTO_TCP: |
|
955 |
/* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ |
|
956 |
||
957 |
/* Insert it in the Bind Hash */ |
|
958 |
connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; |
|
959 |
if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { |
|
960 |
IPCL_HASH_INSERT_BOUND(connfp, connp); |
|
961 |
} else { |
|
962 |
IPCL_HASH_INSERT_WILDCARD(connfp, connp); |
|
963 |
} |
|
964 |
if (cl_inet_listen != NULL) { |
|
965 |
sa_family_t addr_family; |
|
966 |
uint8_t *laddrp; |
|
967 |
||
968 |
if (connp->conn_pkt_isv6) { |
|
969 |
addr_family = AF_INET6; |
|
970 |
laddrp = |
|
971 |
(uint8_t *)&connp->conn_bound_source_v6; |
|
972 |
} else { |
|
973 |
addr_family = AF_INET; |
|
974 |
laddrp = (uint8_t *)&connp->conn_bound_source; |
|
975 |
} |
|
976 |
connp->conn_flags |= IPCL_CL_LISTENER; |
|
977 |
(*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, |
|
978 |
lport); |
|
979 |
} |
|
980 |
break; |
|
981 |
||
982 |
case IPPROTO_SCTP: |
|
983 |
ret = ipcl_sctp_hash_insert(connp, lport); |
|
984 |
break; |
|
985 |
} |
|
986 |
||
987 |
return (ret); |
|
988 |
} |
|
989 |
||
990 |
/* |
|
991 |
* ipcl_conn_hash insertion routines. |
|
992 |
*/ |
|
993 |
int |
|
994 |
ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, |
|
995 |
ipaddr_t rem, uint32_t ports) |
|
996 |
{ |
|
997 |
connf_t *connfp; |
|
998 |
uint16_t *up; |
|
999 |
conn_t *tconnp; |
|
1000 |
#ifdef IPCL_DEBUG |
|
1001 |
char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; |
|
1002 |
#endif |
|
1003 |
in_port_t lport; |
|
1004 |
int ret = 0; |
|
1005 |
||
1006 |
IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " |
|
1007 |
"dst = %s, ports = %x, protocol = %x", (void *)connp, |
|
1008 |
inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), |
|
1009 |
ports, protocol)); |
|
1010 |
||
1011 |
switch (protocol) { |
|
1012 |
case IPPROTO_TCP: |
|
1013 |
if (!(connp->conn_flags & IPCL_EAGER)) { |
|
1014 |
/* |
|
1015 |
* for a eager connection, i.e connections which |
|
1016 |
* have just been created, the initialization is |
|
1017 |
* already done in ip at conn_creation time, so |
|
1018 |
* we can skip the checks here. |
|
1019 |
*/ |
|
1020 |
IPCL_CONN_INIT(connp, protocol, src, rem, ports); |
|
1021 |
} |
|
1022 |
connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(connp->conn_rem, |
|
1023 |
connp->conn_ports)]; |
|
1024 |
mutex_enter(&connfp->connf_lock); |
|
1025 |
for (tconnp = connfp->connf_head; tconnp != NULL; |
|
1026 |
tconnp = tconnp->conn_next) { |
|
1027 |
if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, |
|
1028 |
connp->conn_rem, connp->conn_src, |
|
1029 |
connp->conn_ports)) { |
|
1030 |
||
1031 |
/* Already have a conn. bail out */ |
|
1032 |
mutex_exit(&connfp->connf_lock); |
|
1033 |
return (EADDRINUSE); |
|
1034 |
} |
|
1035 |
} |
|
1036 |
if (connp->conn_fanout != NULL) { |
|
1037 |
/* |
|
1038 |
* Probably a XTI/TLI application trying to do a |
|
1039 |
* rebind. Let it happen. |
|
1040 |
*/ |
|
1041 |
mutex_exit(&connfp->connf_lock); |
|
1042 |
IPCL_HASH_REMOVE(connp); |
|
1043 |
mutex_enter(&connfp->connf_lock); |
|
1044 |
} |
|
1045 |
IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); |
|
1046 |
mutex_exit(&connfp->connf_lock); |
|
1047 |
break; |
|
1048 |
||
1049 |
case IPPROTO_SCTP: |
|
409
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1050 |
/* |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1051 |
* The raw socket may have already been bound, remove it |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1052 |
* from the hash first. |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1053 |
*/ |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1054 |
IPCL_HASH_REMOVE(connp); |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1055 |
lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); |
0 | 1056 |
ret = ipcl_sctp_hash_insert(connp, lport); |
1057 |
break; |
|
1058 |
||
1059 |
case IPPROTO_UDP: |
|
1060 |
default: |
|
1061 |
up = (uint16_t *)&ports; |
|
1062 |
IPCL_CONN_INIT(connp, protocol, src, rem, ports); |
|
1063 |
if (protocol == IPPROTO_UDP) { |
|
1064 |
connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; |
|
1065 |
} else { |
|
1066 |
connfp = &ipcl_proto_fanout[protocol]; |
|
1067 |
} |
|
1068 |
||
1069 |
if (connp->conn_rem != INADDR_ANY) { |
|
1070 |
IPCL_HASH_INSERT_CONNECTED(connfp, connp); |
|
1071 |
} else if (connp->conn_src != INADDR_ANY) { |
|
1072 |
IPCL_HASH_INSERT_BOUND(connfp, connp); |
|
1073 |
} else { |
|
1074 |
IPCL_HASH_INSERT_WILDCARD(connfp, connp); |
|
1075 |
} |
|
1076 |
break; |
|
1077 |
} |
|
1078 |
||
1079 |
return (ret); |
|
1080 |
} |
|
1081 |
||
1082 |
int |
|
1083 |
ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, |
|
1084 |
const in6_addr_t *rem, uint32_t ports, uint_t ifindex) |
|
1085 |
{ |
|
1086 |
connf_t *connfp; |
|
1087 |
uint16_t *up; |
|
1088 |
conn_t *tconnp; |
|
1089 |
in_port_t lport; |
|
1090 |
int ret = 0; |
|
1091 |
||
1092 |
switch (protocol) { |
|
1093 |
case IPPROTO_TCP: |
|
1094 |
/* Just need to insert a conn struct */ |
|
1095 |
if (!(connp->conn_flags & IPCL_EAGER)) { |
|
1096 |
IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); |
|
1097 |
} |
|
1098 |
connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(connp->conn_remv6, |
|
1099 |
connp->conn_ports)]; |
|
1100 |
mutex_enter(&connfp->connf_lock); |
|
1101 |
for (tconnp = connfp->connf_head; tconnp != NULL; |
|
1102 |
tconnp = tconnp->conn_next) { |
|
1103 |
if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, |
|
1104 |
connp->conn_remv6, connp->conn_srcv6, |
|
1105 |
connp->conn_ports) && |
|
1106 |
(tconnp->conn_tcp->tcp_bound_if == 0 || |
|
1107 |
tconnp->conn_tcp->tcp_bound_if == ifindex)) { |
|
1108 |
/* Already have a conn. bail out */ |
|
1109 |
mutex_exit(&connfp->connf_lock); |
|
1110 |
return (EADDRINUSE); |
|
1111 |
} |
|
1112 |
} |
|
1113 |
if (connp->conn_fanout != NULL) { |
|
1114 |
/* |
|
1115 |
* Probably a XTI/TLI application trying to do a |
|
1116 |
* rebind. Let it happen. |
|
1117 |
*/ |
|
1118 |
mutex_exit(&connfp->connf_lock); |
|
1119 |
IPCL_HASH_REMOVE(connp); |
|
1120 |
mutex_enter(&connfp->connf_lock); |
|
1121 |
} |
|
1122 |
IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); |
|
1123 |
mutex_exit(&connfp->connf_lock); |
|
1124 |
break; |
|
1125 |
||
1126 |
case IPPROTO_SCTP: |
|
409
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1127 |
IPCL_HASH_REMOVE(connp); |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1128 |
lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); |
0 | 1129 |
ret = ipcl_sctp_hash_insert(connp, lport); |
1130 |
break; |
|
1131 |
||
1132 |
case IPPROTO_UDP: |
|
1133 |
default: |
|
1134 |
up = (uint16_t *)&ports; |
|
1135 |
IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); |
|
1136 |
if (protocol == IPPROTO_UDP) { |
|
1137 |
connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; |
|
1138 |
} else { |
|
1139 |
connfp = &ipcl_proto_fanout_v6[protocol]; |
|
1140 |
} |
|
1141 |
||
1142 |
if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { |
|
1143 |
IPCL_HASH_INSERT_CONNECTED(connfp, connp); |
|
1144 |
} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { |
|
1145 |
IPCL_HASH_INSERT_BOUND(connfp, connp); |
|
1146 |
} else { |
|
1147 |
IPCL_HASH_INSERT_WILDCARD(connfp, connp); |
|
1148 |
} |
|
1149 |
break; |
|
1150 |
} |
|
1151 |
||
1152 |
return (ret); |
|
1153 |
} |
|
1154 |
||
1155 |
/* |
|
1156 |
* v4 packet classifying function. looks up the fanout table to |
|
1157 |
* find the conn, the packet belongs to. returns the conn with |
|
1158 |
* the reference held, null otherwise. |
|
1159 |
*/ |
|
1160 |
conn_t * |
|
1161 |
ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) |
|
1162 |
{ |
|
1163 |
ipha_t *ipha; |
|
1164 |
connf_t *connfp, *bind_connfp; |
|
1165 |
uint16_t lport; |
|
1166 |
uint16_t fport; |
|
1167 |
uint32_t ports; |
|
1168 |
conn_t *connp; |
|
1169 |
uint16_t *up; |
|
1170 |
||
1171 |
ipha = (ipha_t *)mp->b_rptr; |
|
1172 |
up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); |
|
1173 |
||
1174 |
switch (protocol) { |
|
1175 |
case IPPROTO_TCP: |
|
1176 |
ports = *(uint32_t *)up; |
|
1177 |
connfp = |
|
1178 |
&ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, ports)]; |
|
1179 |
mutex_enter(&connfp->connf_lock); |
|
1180 |
for (connp = connfp->connf_head; connp != NULL; |
|
1181 |
connp = connp->conn_next) { |
|
1182 |
if (IPCL_CONN_MATCH(connp, protocol, |
|
1183 |
ipha->ipha_src, ipha->ipha_dst, ports)) |
|
1184 |
break; |
|
1185 |
} |
|
1186 |
||
1187 |
if (connp != NULL) { |
|
1188 |
CONN_INC_REF(connp); |
|
1189 |
mutex_exit(&connfp->connf_lock); |
|
1190 |
return (connp); |
|
1191 |
} |
|
1192 |
||
1193 |
mutex_exit(&connfp->connf_lock); |
|
1194 |
||
1195 |
lport = up[1]; |
|
1196 |
bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; |
|
1197 |
mutex_enter(&bind_connfp->connf_lock); |
|
1198 |
for (connp = bind_connfp->connf_head; connp != NULL; |
|
1199 |
connp = connp->conn_next) { |
|
1200 |
if (IPCL_BIND_MATCH(connp, protocol, |
|
1201 |
ipha->ipha_dst, lport) && |
|
1202 |
connp->conn_zoneid == zoneid) |
|
1203 |
break; |
|
1204 |
} |
|
1205 |
||
1206 |
if (connp != NULL) { |
|
1207 |
/* Have a listner at least */ |
|
1208 |
CONN_INC_REF(connp); |
|
1209 |
mutex_exit(&bind_connfp->connf_lock); |
|
1210 |
return (connp); |
|
1211 |
} |
|
1212 |
||
1213 |
mutex_exit(&bind_connfp->connf_lock); |
|
1214 |
||
1215 |
IPCL_DEBUG_LVL(512, |
|
1216 |
("ipcl_classify: couldn't classify mp = %p\n", |
|
1217 |
(void *)mp)); |
|
1218 |
break; |
|
1219 |
||
1220 |
case IPPROTO_UDP: |
|
1221 |
lport = up[1]; |
|
1222 |
fport = up[0]; |
|
1223 |
IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); |
|
1224 |
connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; |
|
1225 |
mutex_enter(&connfp->connf_lock); |
|
1226 |
for (connp = connfp->connf_head; connp != NULL; |
|
1227 |
connp = connp->conn_next) { |
|
1228 |
if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, |
|
1229 |
fport, ipha->ipha_src) && |
|
1230 |
connp->conn_zoneid == zoneid) |
|
1231 |
break; |
|
1232 |
} |
|
1233 |
||
1234 |
if (connp != NULL) { |
|
1235 |
CONN_INC_REF(connp); |
|
1236 |
mutex_exit(&connfp->connf_lock); |
|
1237 |
return (connp); |
|
1238 |
} |
|
1239 |
||
1240 |
/* |
|
1241 |
* We shouldn't come here for multicast/broadcast packets |
|
1242 |
*/ |
|
1243 |
mutex_exit(&connfp->connf_lock); |
|
1244 |
IPCL_DEBUG_LVL(512, |
|
1245 |
("ipcl_classify: cant find udp conn_t for ports : %x %x", |
|
1246 |
lport, fport)); |
|
1247 |
break; |
|
1248 |
} |
|
1249 |
||
1250 |
return (NULL); |
|
1251 |
} |
|
1252 |
||
1253 |
conn_t * |
|
1254 |
ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) |
|
1255 |
{ |
|
1256 |
ip6_t *ip6h; |
|
1257 |
connf_t *connfp, *bind_connfp; |
|
1258 |
uint16_t lport; |
|
1259 |
uint16_t fport; |
|
1260 |
tcph_t *tcph; |
|
1261 |
uint32_t ports; |
|
1262 |
conn_t *connp; |
|
1263 |
uint16_t *up; |
|
1264 |
||
1265 |
||
1266 |
ip6h = (ip6_t *)mp->b_rptr; |
|
1267 |
||
1268 |
switch (protocol) { |
|
1269 |
case IPPROTO_TCP: |
|
1270 |
tcph = (tcph_t *)&mp->b_rptr[hdr_len]; |
|
1271 |
up = (uint16_t *)tcph->th_lport; |
|
1272 |
ports = *(uint32_t *)up; |
|
1273 |
||
1274 |
connfp = |
|
1275 |
&ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, ports)]; |
|
1276 |
mutex_enter(&connfp->connf_lock); |
|
1277 |
for (connp = connfp->connf_head; connp != NULL; |
|
1278 |
connp = connp->conn_next) { |
|
1279 |
if (IPCL_CONN_MATCH_V6(connp, protocol, |
|
1280 |
ip6h->ip6_src, ip6h->ip6_dst, ports)) |
|
1281 |
break; |
|
1282 |
} |
|
1283 |
||
1284 |
if (connp != NULL) { |
|
1285 |
CONN_INC_REF(connp); |
|
1286 |
mutex_exit(&connfp->connf_lock); |
|
1287 |
return (connp); |
|
1288 |
} |
|
1289 |
||
1290 |
mutex_exit(&connfp->connf_lock); |
|
1291 |
||
1292 |
lport = up[1]; |
|
1293 |
bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; |
|
1294 |
mutex_enter(&bind_connfp->connf_lock); |
|
1295 |
for (connp = bind_connfp->connf_head; connp != NULL; |
|
1296 |
connp = connp->conn_next) { |
|
1297 |
if (IPCL_BIND_MATCH_V6(connp, protocol, |
|
1298 |
ip6h->ip6_dst, lport) && |
|
1299 |
connp->conn_zoneid == zoneid) |
|
1300 |
break; |
|
1301 |
} |
|
1302 |
||
1303 |
if (connp != NULL) { |
|
1304 |
/* Have a listner at least */ |
|
1305 |
CONN_INC_REF(connp); |
|
1306 |
mutex_exit(&bind_connfp->connf_lock); |
|
1307 |
IPCL_DEBUG_LVL(512, |
|
1308 |
("ipcl_classify_v6: found listner " |
|
1309 |
"connp = %p\n", (void *)connp)); |
|
1310 |
||
1311 |
return (connp); |
|
1312 |
} |
|
1313 |
||
1314 |
mutex_exit(&bind_connfp->connf_lock); |
|
1315 |
||
1316 |
IPCL_DEBUG_LVL(512, |
|
1317 |
("ipcl_classify_v6: couldn't classify mp = %p\n", |
|
1318 |
(void *)mp)); |
|
1319 |
break; |
|
1320 |
||
1321 |
case IPPROTO_UDP: |
|
1322 |
up = (uint16_t *)&mp->b_rptr[hdr_len]; |
|
1323 |
lport = up[1]; |
|
1324 |
fport = up[0]; |
|
1325 |
IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, |
|
1326 |
fport)); |
|
1327 |
connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; |
|
1328 |
mutex_enter(&connfp->connf_lock); |
|
1329 |
for (connp = connfp->connf_head; connp != NULL; |
|
1330 |
connp = connp->conn_next) { |
|
1331 |
if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, |
|
1332 |
fport, ip6h->ip6_src) && |
|
1333 |
connp->conn_zoneid == zoneid) |
|
1334 |
break; |
|
1335 |
} |
|
1336 |
||
1337 |
if (connp != NULL) { |
|
1338 |
CONN_INC_REF(connp); |
|
1339 |
mutex_exit(&connfp->connf_lock); |
|
1340 |
return (connp); |
|
1341 |
} |
|
1342 |
||
1343 |
/* |
|
1344 |
* We shouldn't come here for multicast/broadcast packets |
|
1345 |
*/ |
|
1346 |
mutex_exit(&connfp->connf_lock); |
|
1347 |
IPCL_DEBUG_LVL(512, |
|
1348 |
("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", |
|
1349 |
lport, fport)); |
|
1350 |
break; |
|
1351 |
} |
|
1352 |
||
1353 |
||
1354 |
return (NULL); |
|
1355 |
} |
|
1356 |
||
1357 |
/* |
|
1358 |
* wrapper around ipcl_classify_(v4,v6) routines. |
|
1359 |
*/ |
|
1360 |
conn_t * |
|
1361 |
ipcl_classify(mblk_t *mp, zoneid_t zoneid) |
|
1362 |
{ |
|
1363 |
uint16_t hdr_len; |
|
1364 |
ipha_t *ipha; |
|
1365 |
uint8_t *nexthdrp; |
|
1366 |
||
1367 |
if (MBLKL(mp) < sizeof (ipha_t)) |
|
1368 |
return (NULL); |
|
1369 |
||
1370 |
switch (IPH_HDR_VERSION(mp->b_rptr)) { |
|
1371 |
case IPV4_VERSION: |
|
1372 |
ipha = (ipha_t *)mp->b_rptr; |
|
1373 |
hdr_len = IPH_HDR_LENGTH(ipha); |
|
1374 |
return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, |
|
1375 |
zoneid)); |
|
1376 |
case IPV6_VERSION: |
|
1377 |
if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, |
|
1378 |
&hdr_len, &nexthdrp)) |
|
1379 |
return (NULL); |
|
1380 |
||
1381 |
return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid)); |
|
1382 |
} |
|
1383 |
||
1384 |
return (NULL); |
|
1385 |
} |
|
1386 |
||
1387 |
conn_t * |
|
1388 |
ipcl_classify_raw(uint8_t protocol, zoneid_t zoneid, uint32_t ports, |
|
1389 |
ipha_t *hdr) |
|
1390 |
{ |
|
1391 |
struct connf_s *connfp; |
|
1392 |
conn_t *connp; |
|
1393 |
in_port_t lport; |
|
1394 |
int af; |
|
1395 |
||
1396 |
lport = ((uint16_t *)&ports)[1]; |
|
1397 |
af = IPH_HDR_VERSION(hdr); |
|
1398 |
connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; |
|
1399 |
||
1400 |
mutex_enter(&connfp->connf_lock); |
|
1401 |
for (connp = connfp->connf_head; connp != NULL; |
|
1402 |
connp = connp->conn_next) { |
|
1403 |
/* We don't allow v4 fallback for v6 raw socket. */ |
|
1404 |
if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : |
|
1405 |
IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) { |
|
1406 |
continue; |
|
1407 |
} |
|
1408 |
if (connp->conn_fully_bound) { |
|
1409 |
if (af == IPV4_VERSION) { |
|
1410 |
if (IPCL_CONN_MATCH(connp, protocol, |
|
1411 |
hdr->ipha_src, hdr->ipha_dst, ports)) { |
|
1412 |
break; |
|
1413 |
} |
|
1414 |
} else { |
|
1415 |
if (IPCL_CONN_MATCH_V6(connp, protocol, |
|
1416 |
((ip6_t *)hdr)->ip6_src, |
|
1417 |
((ip6_t *)hdr)->ip6_dst, ports)) { |
|
1418 |
break; |
|
1419 |
} |
|
1420 |
} |
|
1421 |
} else { |
|
1422 |
if (af == IPV4_VERSION) { |
|
1423 |
if (IPCL_BIND_MATCH(connp, protocol, |
|
1424 |
hdr->ipha_dst, lport)) { |
|
1425 |
break; |
|
1426 |
} |
|
1427 |
} else { |
|
1428 |
if (IPCL_BIND_MATCH_V6(connp, protocol, |
|
1429 |
((ip6_t *)hdr)->ip6_dst, lport)) { |
|
1430 |
break; |
|
1431 |
} |
|
1432 |
} |
|
1433 |
} |
|
1434 |
} |
|
409
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1435 |
|
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1436 |
if (connp != NULL) |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1437 |
goto found; |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1438 |
mutex_exit(&connfp->connf_lock); |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1439 |
|
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1440 |
/* Try to look for a wildcard match. */ |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1441 |
connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(0)]; |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1442 |
mutex_enter(&connfp->connf_lock); |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1443 |
for (connp = connfp->connf_head; connp != NULL; |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1444 |
connp = connp->conn_next) { |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1445 |
/* We don't allow v4 fallback for v6 raw socket. */ |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1446 |
if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1447 |
IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) { |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1448 |
continue; |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1449 |
} |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1450 |
if (af == IPV4_VERSION) { |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1451 |
if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1452 |
break; |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1453 |
} else { |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1454 |
if (IPCL_RAW_MATCH_V6(connp, protocol, |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1455 |
((ip6_t *)hdr)->ip6_dst)) { |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1456 |
break; |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1457 |
} |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1458 |
} |
0 | 1459 |
} |
409
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1460 |
|
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1461 |
if (connp != NULL) |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1462 |
goto found; |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1463 |
|
0 | 1464 |
mutex_exit(&connfp->connf_lock); |
1465 |
return (NULL); |
|
409
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1466 |
|
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1467 |
found: |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1468 |
ASSERT(connp != NULL); |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1469 |
CONN_INC_REF(connp); |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1470 |
mutex_exit(&connfp->connf_lock); |
22012dc8ea5b
6294727 SCTP raw socket bind() failed for ports which are mutliples of 256
kcpoon
parents:
153
diff
changeset
|
1471 |
return (connp); |
0 | 1472 |
} |
1473 |
||
1474 |
/* ARGSUSED */ |
|
1475 |
static int |
|
1476 |
ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags) |
|
1477 |
{ |
|
1478 |
itc_t *itc = (itc_t *)buf; |
|
1479 |
conn_t *connp = &itc->itc_conn; |
|
1480 |
tcp_t *tcp = &itc->itc_tcp; |
|
1481 |
bzero(itc, sizeof (itc_t)); |
|
1482 |
tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); |
|
1483 |
connp->conn_tcp = tcp; |
|
1484 |
connp->conn_flags = IPCL_TCPCONN; |
|
1485 |
connp->conn_ulp = IPPROTO_TCP; |
|
1486 |
tcp->tcp_connp = connp; |
|
1487 |
return (0); |
|
1488 |
} |
|
1489 |
||
1490 |
/* ARGSUSED */ |
|
1491 |
static void |
|
1492 |
ipcl_tcpconn_destructor(void *buf, void *cdrarg) |
|
1493 |
{ |
|
1494 |
tcp_timermp_free(((conn_t *)buf)->conn_tcp); |
|
1495 |
} |
|
1496 |
||
1497 |
/* |
|
1498 |
* All conns are inserted in a global multi-list for the benefit of |
|
1499 |
* walkers. The walk is guaranteed to walk all open conns at the time |
|
1500 |
* of the start of the walk exactly once. This property is needed to |
|
1501 |
* achieve some cleanups during unplumb of interfaces. This is achieved |
|
1502 |
* as follows. |
|
1503 |
* |
|
1504 |
* ipcl_conn_create and ipcl_conn_destroy are the only functions that |
|
1505 |
* call the insert and delete functions below at creation and deletion |
|
1506 |
* time respectively. The conn never moves or changes its position in this |
|
1507 |
* multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt |
|
1508 |
* won't increase due to walkers, once the conn deletion has started. Note |
|
1509 |
* that we can't remove the conn from the global list and then wait for |
|
1510 |
* the refcnt to drop to zero, since walkers would then see a truncated |
|
1511 |
* list. CONN_INCIPIENT ensures that walkers don't start looking at |
|
1512 |
* conns until ip_open is ready to make them globally visible. |
|
1513 |
* The global round robin multi-list locks are held only to get the |
|
1514 |
* next member/insertion/deletion and contention should be negligible |
|
1515 |
* if the multi-list is much greater than the number of cpus. |
|
1516 |
*/ |
|
1517 |
void |
|
1518 |
ipcl_globalhash_insert(conn_t *connp) |
|
1519 |
{ |
|
1520 |
int index; |
|
1521 |
||
1522 |
/* |
|
1523 |
* No need for atomic here. Approximate even distribution |
|
1524 |
* in the global lists is sufficient. |
|
1525 |
*/ |
|
1526 |
conn_g_index++; |
|
1527 |
index = conn_g_index & (CONN_G_HASH_SIZE - 1); |
|
1528 |
||
1529 |
connp->conn_g_prev = NULL; |
|
1530 |
/* |
|
1531 |
* Mark as INCIPIENT, so that walkers will ignore this |
|
1532 |
* for now, till ip_open is ready to make it visible globally. |
|
1533 |
*/ |
|
1534 |
connp->conn_state_flags |= CONN_INCIPIENT; |
|
1535 |
||
1536 |
/* Insert at the head of the list */ |
|
1537 |
mutex_enter(&ipcl_globalhash_fanout[index].connf_lock); |
|
1538 |
connp->conn_g_next = ipcl_globalhash_fanout[index].connf_head; |
|
1539 |
if (connp->conn_g_next != NULL) |
|
1540 |
connp->conn_g_next->conn_g_prev = connp; |
|
1541 |
ipcl_globalhash_fanout[index].connf_head = connp; |
|
1542 |
||
1543 |
/* The fanout bucket this conn points to */ |
|
1544 |
connp->conn_g_fanout = &ipcl_globalhash_fanout[index]; |
|
1545 |
||
1546 |
mutex_exit(&ipcl_globalhash_fanout[index].connf_lock); |
|
1547 |
} |
|
1548 |
||
1549 |
void |
|
1550 |
ipcl_globalhash_remove(conn_t *connp) |
|
1551 |
{ |
|
1552 |
/* |
|
1553 |
* We were never inserted in the global multi list. |
|
1554 |
* IPCL_NONE variety is never inserted in the global multilist |
|
1555 |
* since it is presumed to not need any cleanup and is transient. |
|
1556 |
*/ |
|
1557 |
if (connp->conn_g_fanout == NULL) |
|
1558 |
return; |
|
1559 |
||
1560 |
mutex_enter(&connp->conn_g_fanout->connf_lock); |
|
1561 |
if (connp->conn_g_prev != NULL) |
|
1562 |
connp->conn_g_prev->conn_g_next = connp->conn_g_next; |
|
1563 |
else |
|
1564 |
connp->conn_g_fanout->connf_head = connp->conn_g_next; |
|
1565 |
if (connp->conn_g_next != NULL) |
|
1566 |
connp->conn_g_next->conn_g_prev = connp->conn_g_prev; |
|
1567 |
mutex_exit(&connp->conn_g_fanout->connf_lock); |
|
1568 |
||
1569 |
/* Better to stumble on a null pointer than to corrupt memory */ |
|
1570 |
connp->conn_g_next = NULL; |
|
1571 |
connp->conn_g_prev = NULL; |
|
1572 |
} |
|
1573 |
||
1574 |
/* |
|
1575 |
* Walk the list of all conn_t's in the system, calling the function provided |
|
1576 |
* with the specified argument for each. |
|
1577 |
* Applies to both IPv4 and IPv6. |
|
1578 |
* |
|
1579 |
* IPCs may hold pointers to ipif/ill. To guard against stale pointers |
|
1580 |
* ipcl_walk() is called to cleanup the conn_t's, typically when an interface is |
|
1581 |
* unplumbed or removed. New conn_t's that are created while we are walking |
|
1582 |
* may be missed by this walk, because they are not necessarily inserted |
|
1583 |
* at the tail of the list. They are new conn_t's and thus don't have any |
|
1584 |
* stale pointers. The CONN_CLOSING flag ensures that no new reference |
|
1585 |
* is created to the struct that is going away. |
|
1586 |
*/ |
|
1587 |
void |
|
1588 |
ipcl_walk(pfv_t func, void *arg) |
|
1589 |
{ |
|
1590 |
int i; |
|
1591 |
conn_t *connp; |
|
1592 |
conn_t *prev_connp; |
|
1593 |
||
1594 |
for (i = 0; i < CONN_G_HASH_SIZE; i++) { |
|
1595 |
mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); |
|
1596 |
prev_connp = NULL; |
|
1597 |
connp = ipcl_globalhash_fanout[i].connf_head; |
|
1598 |
while (connp != NULL) { |
|
1599 |
mutex_enter(&connp->conn_lock); |
|
1600 |
if (connp->conn_state_flags & |
|
1601 |
(CONN_CONDEMNED | CONN_INCIPIENT)) { |
|
1602 |
mutex_exit(&connp->conn_lock); |
|
1603 |
connp = connp->conn_g_next; |
|
1604 |
continue; |
|
1605 |
} |
|
1606 |
CONN_INC_REF_LOCKED(connp); |
|
1607 |
mutex_exit(&connp->conn_lock); |
|
1608 |
mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); |
|
1609 |
(*func)(connp, arg); |
|
1610 |
if (prev_connp != NULL) |
|
1611 |
CONN_DEC_REF(prev_connp); |
|
1612 |
mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); |
|
1613 |
prev_connp = connp; |
|
1614 |
connp = connp->conn_g_next; |
|
1615 |
} |
|
1616 |
mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); |
|
1617 |
if (prev_connp != NULL) |
|
1618 |
CONN_DEC_REF(prev_connp); |
|
1619 |
} |
|
1620 |
} |
|
1621 |
||
1622 |
/* |
|
1623 |
* Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on |
|
1624 |
* the {src, dst, lport, fport} quadruplet. Returns with conn reference |
|
1625 |
* held; caller must call CONN_DEC_REF. Only checks for connected entries |
|
1626 |
* (peer tcp in at least ESTABLISHED state). |
|
1627 |
*/ |
|
1628 |
conn_t * |
|
1629 |
ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph) |
|
1630 |
{ |
|
1631 |
uint32_t ports; |
|
1632 |
uint16_t *pports = (uint16_t *)&ports; |
|
1633 |
connf_t *connfp; |
|
1634 |
conn_t *tconnp; |
|
1635 |
boolean_t zone_chk; |
|
1636 |
||
1637 |
/* |
|
1638 |
* If either the source of destination address is loopback, then |
|
1639 |
* both endpoints must be in the same Zone. Otherwise, both of |
|
1640 |
* the addresses are system-wide unique (tcp is in ESTABLISHED |
|
1641 |
* state) and the endpoints may reside in different Zones. |
|
1642 |
*/ |
|
1643 |
zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || |
|
1644 |
ipha->ipha_dst == htonl(INADDR_LOOPBACK)); |
|
1645 |
||
1646 |
bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); |
|
1647 |
bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); |
|
1648 |
||
1649 |
connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; |
|
1650 |
||
1651 |
mutex_enter(&connfp->connf_lock); |
|
1652 |
for (tconnp = connfp->connf_head; tconnp != NULL; |
|
1653 |
tconnp = tconnp->conn_next) { |
|
1654 |
||
1655 |
if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, |
|
1656 |
ipha->ipha_dst, ipha->ipha_src, ports) && |
|
1657 |
tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && |
|
1658 |
(!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { |
|
1659 |
||
1660 |
ASSERT(tconnp != connp); |
|
1661 |
CONN_INC_REF(tconnp); |
|
1662 |
mutex_exit(&connfp->connf_lock); |
|
1663 |
return (tconnp); |
|
1664 |
} |
|
1665 |
} |
|
1666 |
mutex_exit(&connfp->connf_lock); |
|
1667 |
return (NULL); |
|
1668 |
} |
|
1669 |
||
1670 |
/* |
|
1671 |
* Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on |
|
1672 |
* the {src, dst, lport, fport} quadruplet. Returns with conn reference |
|
1673 |
* held; caller must call CONN_DEC_REF. Only checks for connected entries |
|
1674 |
* (peer tcp in at least ESTABLISHED state). |
|
1675 |
*/ |
|
1676 |
conn_t * |
|
1677 |
ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph) |
|
1678 |
{ |
|
1679 |
uint32_t ports; |
|
1680 |
uint16_t *pports = (uint16_t *)&ports; |
|
1681 |
connf_t *connfp; |
|
1682 |
conn_t *tconnp; |
|
1683 |
boolean_t zone_chk; |
|
1684 |
||
1685 |
/* |
|
1686 |
* If either the source of destination address is loopback, then |
|
1687 |
* both endpoints must be in the same Zone. Otherwise, both of |
|
1688 |
* the addresses are system-wide unique (tcp is in ESTABLISHED |
|
1689 |
* state) and the endpoints may reside in different Zones. We |
|
1690 |
* don't do Zone check for link local address(es) because the |
|
1691 |
* current Zone implementation treats each link local address as |
|
1692 |
* being unique per system node, i.e. they belong to global Zone. |
|
1693 |
*/ |
|
1694 |
zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || |
|
1695 |
IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); |
|
1696 |
||
1697 |
bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); |
|
1698 |
bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); |
|
1699 |
||
1700 |
connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; |
|
1701 |
||
1702 |
mutex_enter(&connfp->connf_lock); |
|
1703 |
for (tconnp = connfp->connf_head; tconnp != NULL; |
|
1704 |
tconnp = tconnp->conn_next) { |
|
1705 |
||
1706 |
/* We skip tcp_bound_if check here as this is loopback tcp */ |
|
1707 |
if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, |
|
1708 |
ip6h->ip6_dst, ip6h->ip6_src, ports) && |
|
1709 |
tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && |
|
1710 |
(!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { |
|
1711 |
||
1712 |
ASSERT(tconnp != connp); |
|
1713 |
CONN_INC_REF(tconnp); |
|
1714 |
mutex_exit(&connfp->connf_lock); |
|
1715 |
return (tconnp); |
|
1716 |
} |
|
1717 |
} |
|
1718 |
mutex_exit(&connfp->connf_lock); |
|
1719 |
return (NULL); |
|
1720 |
} |
|
1721 |
||
1722 |
/* |
|
1723 |
* Find an exact {src, dst, lport, fport} match for a bounced datagram. |
|
1724 |
* Returns with conn reference held. Caller must call CONN_DEC_REF. |
|
1725 |
* Only checks for connected entries i.e. no INADDR_ANY checks. |
|
1726 |
*/ |
|
1727 |
conn_t * |
|
1728 |
ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state) |
|
1729 |
{ |
|
1730 |
uint32_t ports; |
|
1731 |
uint16_t *pports; |
|
1732 |
connf_t *connfp; |
|
1733 |
conn_t *tconnp; |
|
1734 |
||
1735 |
pports = (uint16_t *)&ports; |
|
1736 |
bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); |
|
1737 |
bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); |
|
1738 |
||
1739 |
connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; |
|
1740 |
||
1741 |
mutex_enter(&connfp->connf_lock); |
|
1742 |
for (tconnp = connfp->connf_head; tconnp != NULL; |
|
1743 |
tconnp = tconnp->conn_next) { |
|
1744 |
||
1745 |
if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, |
|
1746 |
ipha->ipha_dst, ipha->ipha_src, ports) && |
|
1747 |
tconnp->conn_tcp->tcp_state >= min_state) { |
|
1748 |
||
1749 |
CONN_INC_REF(tconnp); |
|
1750 |
mutex_exit(&connfp->connf_lock); |
|
1751 |
return (tconnp); |
|
1752 |
} |
|
1753 |
} |
|
1754 |
mutex_exit(&connfp->connf_lock); |
|
1755 |
return (NULL); |
|
1756 |
} |
|
1757 |
||
1758 |
/* |
|
1759 |
* Find an exact {src, dst, lport, fport} match for a bounced datagram. |
|
1760 |
* Returns with conn reference held. Caller must call CONN_DEC_REF. |
|
1761 |
* Only checks for connected entries i.e. no INADDR_ANY checks. |
|
1762 |
* Match on ifindex in addition to addresses. |
|
1763 |
*/ |
|
1764 |
conn_t * |
|
1765 |
ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, |
|
1766 |
uint_t ifindex) |
|
1767 |
{ |
|
1768 |
tcp_t *tcp; |
|
1769 |
uint32_t ports; |
|
1770 |
uint16_t *pports; |
|
1771 |
connf_t *connfp; |
|
1772 |
conn_t *tconnp; |
|
1773 |
||
1774 |
pports = (uint16_t *)&ports; |
|
1775 |
pports[0] = tcpha->tha_fport; |
|
1776 |
pports[1] = tcpha->tha_lport; |
|
1777 |
||
1778 |
connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; |
|
1779 |
||
1780 |
mutex_enter(&connfp->connf_lock); |
|
1781 |
for (tconnp = connfp->connf_head; tconnp != NULL; |
|
1782 |
tconnp = tconnp->conn_next) { |
|
1783 |
||
1784 |
tcp = tconnp->conn_tcp; |
|
1785 |
if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, |
|
1786 |
ip6h->ip6_dst, ip6h->ip6_src, ports) && |
|
1787 |
tcp->tcp_state >= min_state && |
|
1788 |
(tcp->tcp_bound_if == 0 || |
|
1789 |
tcp->tcp_bound_if == ifindex)) { |
|
1790 |
||
1791 |
CONN_INC_REF(tconnp); |
|
1792 |
mutex_exit(&connfp->connf_lock); |
|
1793 |
return (tconnp); |
|
1794 |
} |
|
1795 |
} |
|
1796 |
mutex_exit(&connfp->connf_lock); |
|
1797 |
return (NULL); |
|
1798 |
} |
|
1799 |
||
1800 |
/* |
|
1801 |
* To find a TCP listening connection matching the incoming segment. |
|
1802 |
*/ |
|
1803 |
conn_t * |
|
1804 |
ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid) |
|
1805 |
{ |
|
1806 |
connf_t *bind_connfp; |
|
1807 |
conn_t *connp; |
|
1808 |
tcp_t *tcp; |
|
1809 |
||
1810 |
/* |
|
1811 |
* Avoid false matches for packets sent to an IP destination of |
|
1812 |
* all zeros. |
|
1813 |
*/ |
|
1814 |
if (laddr == 0) |
|
1815 |
return (NULL); |
|
1816 |
||
1817 |
bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; |
|
1818 |
mutex_enter(&bind_connfp->connf_lock); |
|
1819 |
for (connp = bind_connfp->connf_head; connp != NULL; |
|
1820 |
connp = connp->conn_next) { |
|
1821 |
tcp = connp->conn_tcp; |
|
1822 |
if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && |
|
1823 |
connp->conn_zoneid == zoneid && |
|
1824 |
(tcp->tcp_listener == NULL)) { |
|
1825 |
CONN_INC_REF(connp); |
|
1826 |
mutex_exit(&bind_connfp->connf_lock); |
|
1827 |
return (connp); |
|
1828 |
} |
|
1829 |
} |
|
1830 |
mutex_exit(&bind_connfp->connf_lock); |
|
1831 |
return (NULL); |
|
1832 |
} |
|
1833 |
||
1834 |
||
1835 |
conn_t * |
|
1836 |
ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, |
|
1837 |
zoneid_t zoneid) |
|
1838 |
{ |
|
1839 |
connf_t *bind_connfp; |
|
1840 |
conn_t *connp = NULL; |
|
1841 |
tcp_t *tcp; |
|
1842 |
||
1843 |
/* |
|
1844 |
* Avoid false matches for packets sent to an IP destination of |
|
1845 |
* all zeros. |
|
1846 |
*/ |
|
1847 |
if (IN6_IS_ADDR_UNSPECIFIED(laddr)) |
|
1848 |
return (NULL); |
|
1849 |
||
1850 |
||
1851 |
bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; |
|
1852 |
mutex_enter(&bind_connfp->connf_lock); |
|
1853 |
for (connp = bind_connfp->connf_head; connp != NULL; |
|
1854 |
connp = connp->conn_next) { |
|
1855 |
tcp = connp->conn_tcp; |
|
1856 |
if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && |
|
1857 |
connp->conn_zoneid == zoneid && |
|
1858 |
(tcp->tcp_bound_if == 0 || |
|
1859 |
tcp->tcp_bound_if == ifindex) && |
|
1860 |
tcp->tcp_listener == NULL) { |
|
1861 |
CONN_INC_REF(connp); |
|
1862 |
mutex_exit(&bind_connfp->connf_lock); |
|
1863 |
return (connp); |
|
1864 |
} |
|
1865 |
} |
|
1866 |
mutex_exit(&bind_connfp->connf_lock); |
|
1867 |
return (NULL); |
|
1868 |
} |
|
1869 |
||
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1870 |
/* |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1871 |
* ipcl_get_next_conn |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1872 |
* get the next entry in the conn global list |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1873 |
* and put a reference on the next_conn. |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1874 |
* decrement the reference on the current conn. |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1875 |
* |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1876 |
* This is an iterator based walker function that also provides for |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1877 |
* some selection by the caller. It walks through the conn_hash bucket |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1878 |
* searching for the next valid connp in the list, and selects connections |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1879 |
* that are neither closed nor condemned. It also REFHOLDS the conn |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1880 |
* thus ensuring that the conn exists when the caller uses the conn. |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1881 |
*/ |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1882 |
conn_t * |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1883 |
ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1884 |
{ |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1885 |
conn_t *next_connp; |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1886 |
|
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1887 |
if (connfp == NULL) |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1888 |
return (NULL); |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1889 |
|
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1890 |
mutex_enter(&connfp->connf_lock); |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1891 |
|
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1892 |
next_connp = (connp == NULL) ? |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1893 |
connfp->connf_head : connp->conn_g_next; |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1894 |
|
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1895 |
while (next_connp != NULL) { |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1896 |
mutex_enter(&next_connp->conn_lock); |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1897 |
if (!(next_connp->conn_flags & conn_flags) || |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1898 |
(next_connp->conn_state_flags & |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1899 |
(CONN_CONDEMNED | CONN_INCIPIENT))) { |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1900 |
/* |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1901 |
* This conn has been condemned or |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1902 |
* is closing, or the flags don't match |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1903 |
*/ |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1904 |
mutex_exit(&next_connp->conn_lock); |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1905 |
next_connp = next_connp->conn_g_next; |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1906 |
continue; |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1907 |
} |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1908 |
CONN_INC_REF_LOCKED(next_connp); |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1909 |
mutex_exit(&next_connp->conn_lock); |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1910 |
break; |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1911 |
} |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1912 |
|
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1913 |
mutex_exit(&connfp->connf_lock); |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1914 |
|
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1915 |
if (connp != NULL) |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1916 |
CONN_DEC_REF(connp); |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1917 |
|
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1918 |
return (next_connp); |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1919 |
} |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
409
diff
changeset
|
1920 |
|
0 | 1921 |
#ifdef CONN_DEBUG |
1922 |
/* |
|
1923 |
* Trace of the last NBUF refhold/refrele |
|
1924 |
*/ |
|
1925 |
int |
|
1926 |
conn_trace_ref(conn_t *connp) |
|
1927 |
{ |
|
1928 |
int last; |
|
1929 |
conn_trace_t *ctb; |
|
1930 |
||
1931 |
ASSERT(MUTEX_HELD(&connp->conn_lock)); |
|
1932 |
last = connp->conn_trace_last; |
|
1933 |
last++; |
|
1934 |
if (last == CONN_TRACE_MAX) |
|
1935 |
last = 0; |
|
1936 |
||
1937 |
ctb = &connp->conn_trace_buf[last]; |
|
1938 |
ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); |
|
1939 |
connp->conn_trace_last = last; |
|
1940 |
return (1); |
|
1941 |
} |
|
1942 |
||
1943 |
int |
|
1944 |
conn_untrace_ref(conn_t *connp) |
|
1945 |
{ |
|
1946 |
int last; |
|
1947 |
conn_trace_t *ctb; |
|
1948 |
||
1949 |
ASSERT(MUTEX_HELD(&connp->conn_lock)); |
|
1950 |
last = connp->conn_trace_last; |
|
1951 |
last++; |
|
1952 |
if (last == CONN_TRACE_MAX) |
|
1953 |
last = 0; |
|
1954 |
||
1955 |
ctb = &connp->conn_trace_buf[last]; |
|
1956 |
ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); |
|
1957 |
connp->conn_trace_last = last; |
|
1958 |
return (1); |
|
1959 |
} |
|
1960 |
#endif |