author | masputra |
Sat, 22 Oct 2005 22:50:14 -0700 | |
changeset 741 | 40027a3621ac |
parent 0 | 68f95e015346 |
child 980 | 253cff0301e4 |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
5 |
* Common Development and Distribution License, Version 1.0 only |
|
6 |
* (the "License"). You may not use this file except in compliance |
|
7 |
* with the License. |
|
8 |
* |
|
9 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
10 |
* or http://www.opensolaris.org/os/licensing. |
|
11 |
* See the License for the specific language governing permissions |
|
12 |
* and limitations under the License. |
|
13 |
* |
|
14 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
15 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
16 |
* If applicable, add the following below this CDDL HEADER, with the |
|
17 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
18 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
19 |
* |
|
20 |
* CDDL HEADER END |
|
21 |
*/ |
|
22 |
/* |
|
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
23 |
* Copyright 2005 Sun Microsystems, Inc. All rights reserved. |
0 | 24 |
* Use is subject to license terms. |
25 |
*/ |
|
26 |
||
27 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
28 |
||
29 |
#include <sys/types.h> |
|
30 |
#include <sys/stream.h> |
|
31 |
#include <sys/stropts.h> |
|
32 |
#include <sys/sysmacros.h> |
|
33 |
#include <sys/errno.h> |
|
34 |
#include <sys/strlog.h> |
|
35 |
#include <sys/dlpi.h> |
|
36 |
#include <sys/sockio.h> |
|
37 |
#include <sys/tiuser.h> |
|
38 |
#include <sys/tihdr.h> |
|
39 |
#include <sys/socket.h> |
|
40 |
#include <sys/ddi.h> |
|
41 |
#include <sys/cmn_err.h> |
|
42 |
#include <sys/debug.h> |
|
43 |
#include <sys/vtrace.h> |
|
44 |
#include <sys/kmem.h> |
|
45 |
#include <sys/zone.h> |
|
46 |
||
47 |
#include <net/if.h> |
|
48 |
#include <net/if_types.h> |
|
49 |
#include <net/if_dl.h> |
|
50 |
#include <net/route.h> |
|
51 |
#include <sys/sockio.h> |
|
52 |
#include <netinet/in.h> |
|
53 |
#include <netinet/in_systm.h> |
|
54 |
#include <netinet/ip6.h> |
|
55 |
#include <netinet/icmp6.h> |
|
56 |
||
57 |
#include <inet/common.h> |
|
58 |
#include <inet/mi.h> |
|
59 |
#include <inet/mib2.h> |
|
60 |
#include <inet/nd.h> |
|
61 |
#include <inet/arp.h> |
|
62 |
#include <inet/ip.h> |
|
63 |
#include <inet/ip_multi.h> |
|
64 |
#include <inet/ip_if.h> |
|
65 |
#include <inet/ip_ire.h> |
|
66 |
#include <inet/ip_rts.h> |
|
67 |
#include <inet/ip6.h> |
|
68 |
#include <inet/ip_ndp.h> |
|
69 |
#include <inet/ipsec_impl.h> |
|
70 |
#include <inet/ipsec_info.h> |
|
71 |
||
72 |
/* |
|
73 |
* Function names with nce_ prefix are static while function |
|
74 |
* names with ndp_ prefix are used by rest of the IP. |
|
75 |
*/ |
|
76 |
||
77 |
static boolean_t nce_cmp_ll_addr(nce_t *nce, char *new_ll_addr, |
|
78 |
uint32_t ll_addr_len); |
|
79 |
static void nce_fastpath(nce_t *nce); |
|
80 |
static void nce_ire_delete(nce_t *nce); |
|
81 |
static void nce_ire_delete1(ire_t *ire, char *nce_arg); |
|
82 |
static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); |
|
83 |
static nce_t *nce_lookup_addr(ill_t *ill, const in6_addr_t *addr); |
|
84 |
static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); |
|
85 |
static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, |
|
86 |
uchar_t *addr); |
|
87 |
static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); |
|
88 |
static void nce_queue_mp(nce_t *nce, mblk_t *mp); |
|
89 |
static void nce_report1(nce_t *nce, uchar_t *mp_arg); |
|
90 |
static mblk_t *nce_udreq_alloc(ill_t *ill); |
|
91 |
static void nce_update(nce_t *nce, uint16_t new_state, |
|
92 |
uchar_t *new_ll_addr); |
|
93 |
static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); |
|
94 |
static boolean_t nce_xmit(ill_t *ill, uint32_t operation, |
|
95 |
ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, |
|
96 |
const in6_addr_t *target, int flag); |
|
97 |
static void lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf); |
|
98 |
extern void th_trace_rrecord(th_trace_t *); |
|
99 |
||
100 |
#ifdef NCE_DEBUG |
|
101 |
void nce_trace_inactive(nce_t *); |
|
102 |
#endif |
|
103 |
||
104 |
/* NDP Cache Entry Hash Table */ |
|
105 |
#define NCE_TABLE_SIZE 256 |
|
106 |
static nce_t *nce_hash_tbl[NCE_TABLE_SIZE]; |
|
107 |
static nce_t *nce_mask_entries; /* mask not all ones */ |
|
108 |
static int ndp_g_walker = 0; /* # of active thread */ |
|
109 |
/* walking nce hash list */ |
|
110 |
/* ndp_g_walker_cleanup will be true, when deletion have to be defered */ |
|
111 |
static boolean_t ndp_g_walker_cleanup = B_FALSE; |
|
112 |
||
113 |
#ifdef _BIG_ENDIAN |
|
114 |
#define IN6_IS_ADDR_MC_SOLICITEDNODE(addr) \ |
|
115 |
((((addr)->s6_addr32[0] & 0xff020000) == 0xff020000) && \ |
|
116 |
((addr)->s6_addr32[1] == 0x0) && \ |
|
117 |
((addr)->s6_addr32[2] == 0x00000001) && \ |
|
118 |
((addr)->s6_addr32[3] & 0xff000000) == 0xff000000) |
|
119 |
#else /* _BIG_ENDIAN */ |
|
120 |
#define IN6_IS_ADDR_MC_SOLICITEDNODE(addr) \ |
|
121 |
((((addr)->s6_addr32[0] & 0x000002ff) == 0x000002ff) && \ |
|
122 |
((addr)->s6_addr32[1] == 0x0) && \ |
|
123 |
((addr)->s6_addr32[2] == 0x01000000) && \ |
|
124 |
((addr)->s6_addr32[3] & 0x000000ff) == 0x000000ff) |
|
125 |
#endif |
|
126 |
||
127 |
#define NCE_HASH_PTR(addr) \ |
|
128 |
(&(nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) |
|
129 |
||
130 |
/* |
|
131 |
* NDP Cache Entry creation routine. |
|
132 |
* Mapped entries will never do NUD . |
|
133 |
* This routine must always be called with ndp_g_lock held. |
|
134 |
* Prior to return, nce_refcnt is incremented. |
|
135 |
*/ |
|
136 |
int |
|
137 |
ndp_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, |
|
138 |
const in6_addr_t *mask, const in6_addr_t *extract_mask, |
|
139 |
uint32_t hw_extract_start, uint16_t flags, uint16_t state, |
|
140 |
nce_t **newnce) |
|
141 |
{ |
|
142 |
static nce_t nce_nil; |
|
143 |
nce_t *nce; |
|
144 |
mblk_t *mp; |
|
145 |
mblk_t *template; |
|
146 |
nce_t **ncep; |
|
147 |
boolean_t dropped = B_FALSE; |
|
148 |
||
149 |
ASSERT(MUTEX_HELD(&ndp_g_lock)); |
|
150 |
ASSERT(ill != NULL); |
|
151 |
if (IN6_IS_ADDR_UNSPECIFIED(addr)) { |
|
152 |
ip0dbg(("ndp_add: no addr\n")); |
|
153 |
return (EINVAL); |
|
154 |
} |
|
155 |
if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { |
|
156 |
ip0dbg(("ndp_add: flags = %x\n", (int)flags)); |
|
157 |
return (EINVAL); |
|
158 |
} |
|
159 |
if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && |
|
160 |
(flags & NCE_F_MAPPING)) { |
|
161 |
ip0dbg(("ndp_add: extract mask zero for mapping")); |
|
162 |
return (EINVAL); |
|
163 |
} |
|
164 |
/* |
|
165 |
* Allocate the mblk to hold the nce. |
|
166 |
* |
|
167 |
* XXX This can come out of a separate cache - nce_cache. |
|
168 |
* We don't need the mp anymore as there are no more |
|
169 |
* "qwriter"s |
|
170 |
*/ |
|
171 |
mp = allocb(sizeof (nce_t), BPRI_MED); |
|
172 |
if (mp == NULL) |
|
173 |
return (ENOMEM); |
|
174 |
||
175 |
nce = (nce_t *)mp->b_rptr; |
|
176 |
mp->b_wptr = (uchar_t *)&nce[1]; |
|
177 |
*nce = nce_nil; |
|
178 |
||
179 |
/* |
|
180 |
* This one holds link layer address |
|
181 |
*/ |
|
182 |
if (ill->ill_net_type == IRE_IF_RESOLVER) { |
|
183 |
template = nce_udreq_alloc(ill); |
|
184 |
} else { |
|
185 |
ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); |
|
186 |
ASSERT((ill->ill_resolver_mp != NULL)); |
|
187 |
template = copyb(ill->ill_resolver_mp); |
|
188 |
} |
|
189 |
if (template == NULL) { |
|
190 |
freeb(mp); |
|
191 |
return (ENOMEM); |
|
192 |
} |
|
193 |
nce->nce_ill = ill; |
|
194 |
nce->nce_flags = flags; |
|
195 |
nce->nce_state = state; |
|
196 |
nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; |
|
197 |
nce->nce_rcnt = ill->ill_xmit_count; |
|
198 |
nce->nce_addr = *addr; |
|
199 |
nce->nce_mask = *mask; |
|
200 |
nce->nce_extract_mask = *extract_mask; |
|
201 |
nce->nce_ll_extract_start = hw_extract_start; |
|
202 |
nce->nce_fp_mp = NULL; |
|
203 |
nce->nce_res_mp = template; |
|
204 |
if (state == ND_REACHABLE) |
|
205 |
nce->nce_last = TICK_TO_MSEC(lbolt64); |
|
206 |
else |
|
207 |
nce->nce_last = 0; |
|
208 |
nce->nce_qd_mp = NULL; |
|
209 |
nce->nce_mp = mp; |
|
210 |
if (hw_addr != NULL) |
|
211 |
nce_set_ll(nce, hw_addr); |
|
212 |
/* This one is for nce getting created */ |
|
213 |
nce->nce_refcnt = 1; |
|
214 |
mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); |
|
215 |
if (nce->nce_flags & NCE_F_MAPPING) { |
|
216 |
ASSERT(IN6_IS_ADDR_MULTICAST(addr)); |
|
217 |
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); |
|
218 |
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); |
|
219 |
ncep = &nce_mask_entries; |
|
220 |
} else { |
|
221 |
ncep = ((nce_t **)NCE_HASH_PTR(*addr)); |
|
222 |
} |
|
223 |
||
224 |
#ifdef NCE_DEBUG |
|
225 |
bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); |
|
226 |
#endif |
|
227 |
/* |
|
228 |
* Atomically ensure that the ill is not CONDEMNED, before |
|
229 |
* adding the NCE. |
|
230 |
*/ |
|
231 |
mutex_enter(&ill->ill_lock); |
|
232 |
if (ill->ill_state_flags & ILL_CONDEMNED) { |
|
233 |
mutex_exit(&ill->ill_lock); |
|
234 |
freeb(mp); |
|
235 |
return (EINVAL); |
|
236 |
} |
|
237 |
if ((nce->nce_next = *ncep) != NULL) |
|
238 |
nce->nce_next->nce_ptpn = &nce->nce_next; |
|
239 |
*ncep = nce; |
|
240 |
nce->nce_ptpn = ncep; |
|
241 |
*newnce = nce; |
|
242 |
/* This one is for nce being used by an active thread */ |
|
243 |
NCE_REFHOLD(*newnce); |
|
244 |
||
245 |
/* Bump up the number of nce's referencing this ill */ |
|
246 |
ill->ill_nce_cnt++; |
|
247 |
mutex_exit(&ill->ill_lock); |
|
248 |
||
249 |
/* |
|
250 |
* Before we insert the nce, honor the UNSOL_ADV flag. |
|
251 |
* We cannot hold the ndp_g_lock and call nce_xmit |
|
252 |
* which does a putnext. |
|
253 |
*/ |
|
254 |
if (flags & NCE_F_UNSOL_ADV) { |
|
255 |
flags |= NDP_ORIDE; |
|
256 |
/* |
|
257 |
* We account for the transmit below by assigning one |
|
258 |
* less than the ndd variable. Subsequent decrements |
|
259 |
* are done in ndp_timer. |
|
260 |
*/ |
|
261 |
mutex_enter(&nce->nce_lock); |
|
262 |
mutex_exit(&ndp_g_lock); |
|
263 |
nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; |
|
264 |
mutex_exit(&nce->nce_lock); |
|
265 |
dropped = nce_xmit(ill, |
|
266 |
ND_NEIGHBOR_ADVERT, |
|
267 |
ill, /* ill to be used for extracting ill_nd_lla */ |
|
268 |
B_TRUE, /* use ill_nd_lla */ |
|
269 |
addr, /* Source and target of the advertisement pkt */ |
|
270 |
&ipv6_all_hosts_mcast, /* Destination of the packet */ |
|
271 |
flags); |
|
272 |
mutex_enter(&nce->nce_lock); |
|
273 |
if (dropped) |
|
274 |
nce->nce_unsolicit_count++; |
|
275 |
if (nce->nce_unsolicit_count != 0) { |
|
276 |
nce->nce_timeout_id = timeout(ndp_timer, nce, |
|
277 |
MSEC_TO_TICK(ip_ndp_unsolicit_interval)); |
|
278 |
} |
|
279 |
mutex_exit(&nce->nce_lock); |
|
280 |
mutex_enter(&ndp_g_lock); |
|
281 |
} |
|
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
282 |
/* |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
283 |
* If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
284 |
* we call nce_fastpath as soon as the nce is resolved in ndp_process. |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
285 |
* We call nce_fastpath from nce_update if the link layer address of |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
286 |
* the peer changes from nce_update |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
287 |
*/ |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
288 |
if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
289 |
nce_fastpath(nce); |
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
290 |
return (0); |
0 | 291 |
} |
292 |
||
293 |
int |
|
294 |
ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, |
|
295 |
const in6_addr_t *mask, const in6_addr_t *extract_mask, |
|
296 |
uint32_t hw_extract_start, uint16_t flags, uint16_t state, |
|
297 |
nce_t **newnce) |
|
298 |
{ |
|
299 |
int err = 0; |
|
300 |
nce_t *nce; |
|
301 |
||
302 |
mutex_enter(&ndp_g_lock); |
|
303 |
nce = nce_lookup_addr(ill, addr); |
|
304 |
if (nce == NULL) { |
|
305 |
err = ndp_add(ill, |
|
306 |
hw_addr, |
|
307 |
addr, |
|
308 |
mask, |
|
309 |
extract_mask, |
|
310 |
hw_extract_start, |
|
311 |
flags, |
|
312 |
state, |
|
313 |
newnce); |
|
314 |
} else { |
|
315 |
*newnce = nce; |
|
316 |
err = EEXIST; |
|
317 |
} |
|
318 |
mutex_exit(&ndp_g_lock); |
|
319 |
return (err); |
|
320 |
} |
|
321 |
||
322 |
/* |
|
323 |
* Remove all the CONDEMNED nces from the appropriate hash table. |
|
324 |
* We create a private list of NCEs, these may have ires pointing |
|
325 |
* to them, so the list will be passed through to clean up dependent |
|
326 |
* ires and only then we can do NCE_REFRELE which can make NCE inactive. |
|
327 |
*/ |
|
328 |
static void |
|
329 |
nce_remove(nce_t *nce, nce_t **free_nce_list) |
|
330 |
{ |
|
331 |
nce_t *nce1; |
|
332 |
nce_t **ptpn; |
|
333 |
||
334 |
ASSERT(MUTEX_HELD(&ndp_g_lock)); |
|
335 |
ASSERT(ndp_g_walker == 0); |
|
336 |
for (; nce; nce = nce1) { |
|
337 |
nce1 = nce->nce_next; |
|
338 |
mutex_enter(&nce->nce_lock); |
|
339 |
if (nce->nce_flags & NCE_F_CONDEMNED) { |
|
340 |
ptpn = nce->nce_ptpn; |
|
341 |
nce1 = nce->nce_next; |
|
342 |
if (nce1 != NULL) |
|
343 |
nce1->nce_ptpn = ptpn; |
|
344 |
*ptpn = nce1; |
|
345 |
nce->nce_ptpn = NULL; |
|
346 |
nce->nce_next = NULL; |
|
347 |
nce->nce_next = *free_nce_list; |
|
348 |
*free_nce_list = nce; |
|
349 |
} |
|
350 |
mutex_exit(&nce->nce_lock); |
|
351 |
} |
|
352 |
} |
|
353 |
||
354 |
/* |
|
355 |
* 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() |
|
356 |
* will return this NCE. Also no new IREs will be created that |
|
357 |
* point to this NCE (See ire_add_v6). Also no new timeouts will |
|
358 |
* be started (See NDP_RESTART_TIMER). |
|
359 |
* 2. Cancel any currently running timeouts. |
|
360 |
* 3. If there is an ndp walker, return. The walker will do the cleanup. |
|
361 |
* This ensures that walkers see a consistent list of NCEs while walking. |
|
362 |
* 4. Otherwise remove the NCE from the list of NCEs |
|
363 |
* 5. Delete all IREs pointing to this NCE. |
|
364 |
*/ |
|
365 |
void |
|
366 |
ndp_delete(nce_t *nce) |
|
367 |
{ |
|
368 |
nce_t **ptpn; |
|
369 |
nce_t *nce1; |
|
370 |
||
371 |
/* Serialize deletes */ |
|
372 |
mutex_enter(&nce->nce_lock); |
|
373 |
if (nce->nce_flags & NCE_F_CONDEMNED) { |
|
374 |
/* Some other thread is doing the delete */ |
|
375 |
mutex_exit(&nce->nce_lock); |
|
376 |
return; |
|
377 |
} |
|
378 |
/* |
|
379 |
* Caller has a refhold. Also 1 ref for being in the list. Thus |
|
380 |
* refcnt has to be >= 2 |
|
381 |
*/ |
|
382 |
ASSERT(nce->nce_refcnt >= 2); |
|
383 |
nce->nce_flags |= NCE_F_CONDEMNED; |
|
384 |
mutex_exit(&nce->nce_lock); |
|
385 |
||
386 |
nce_fastpath_list_delete(nce); |
|
387 |
||
388 |
/* |
|
389 |
* Cancel any running timer. Timeout can't be restarted |
|
390 |
* since CONDEMNED is set. Can't hold nce_lock across untimeout. |
|
391 |
* Passing invalid timeout id is fine. |
|
392 |
*/ |
|
393 |
if (nce->nce_timeout_id != 0) { |
|
394 |
(void) untimeout(nce->nce_timeout_id); |
|
395 |
nce->nce_timeout_id = 0; |
|
396 |
} |
|
397 |
||
398 |
mutex_enter(&ndp_g_lock); |
|
399 |
if (nce->nce_ptpn == NULL) { |
|
400 |
/* |
|
401 |
* The last ndp walker has already removed this nce from |
|
402 |
* the list after we marked the nce CONDEMNED and before |
|
403 |
* we grabbed the ndp_g_lock. |
|
404 |
*/ |
|
405 |
mutex_exit(&ndp_g_lock); |
|
406 |
return; |
|
407 |
} |
|
408 |
if (ndp_g_walker > 0) { |
|
409 |
/* |
|
410 |
* Can't unlink. The walker will clean up |
|
411 |
*/ |
|
412 |
ndp_g_walker_cleanup = B_TRUE; |
|
413 |
mutex_exit(&ndp_g_lock); |
|
414 |
return; |
|
415 |
} |
|
416 |
||
417 |
/* |
|
418 |
* Now remove the nce from the list. NDP_RESTART_TIMER won't restart |
|
419 |
* the timer since it is marked CONDEMNED. |
|
420 |
*/ |
|
421 |
ptpn = nce->nce_ptpn; |
|
422 |
nce1 = nce->nce_next; |
|
423 |
if (nce1 != NULL) |
|
424 |
nce1->nce_ptpn = ptpn; |
|
425 |
*ptpn = nce1; |
|
426 |
nce->nce_ptpn = NULL; |
|
427 |
nce->nce_next = NULL; |
|
428 |
mutex_exit(&ndp_g_lock); |
|
429 |
||
430 |
nce_ire_delete(nce); |
|
431 |
} |
|
432 |
||
433 |
void |
|
434 |
ndp_inactive(nce_t *nce) |
|
435 |
{ |
|
436 |
mblk_t **mpp; |
|
437 |
ill_t *ill; |
|
438 |
||
439 |
ASSERT(nce->nce_refcnt == 0); |
|
440 |
ASSERT(MUTEX_HELD(&nce->nce_lock)); |
|
441 |
ASSERT(nce->nce_fastpath == NULL); |
|
442 |
||
443 |
/* Free all nce allocated messages */ |
|
444 |
mpp = &nce->nce_first_mp_to_free; |
|
445 |
do { |
|
446 |
while (*mpp != NULL) { |
|
447 |
mblk_t *mp; |
|
448 |
||
449 |
mp = *mpp; |
|
450 |
*mpp = mp->b_next; |
|
451 |
mp->b_next = NULL; |
|
452 |
mp->b_prev = NULL; |
|
453 |
freemsg(mp); |
|
454 |
} |
|
455 |
} while (mpp++ != &nce->nce_last_mp_to_free); |
|
456 |
||
457 |
#ifdef NCE_DEBUG |
|
458 |
nce_trace_inactive(nce); |
|
459 |
#endif |
|
460 |
||
461 |
ill = nce->nce_ill; |
|
462 |
mutex_enter(&ill->ill_lock); |
|
463 |
ill->ill_nce_cnt--; |
|
464 |
/* |
|
465 |
* If the number of nce's associated with this ill have dropped |
|
466 |
* to zero, check whether we need to restart any operation that |
|
467 |
* is waiting for this to happen. |
|
468 |
*/ |
|
469 |
if (ill->ill_nce_cnt == 0) { |
|
470 |
/* ipif_ill_refrele_tail drops the ill_lock */ |
|
471 |
ipif_ill_refrele_tail(ill); |
|
472 |
} else { |
|
473 |
mutex_exit(&ill->ill_lock); |
|
474 |
} |
|
475 |
mutex_destroy(&nce->nce_lock); |
|
476 |
freeb(nce->nce_mp); |
|
477 |
} |
|
478 |
||
479 |
/* |
|
480 |
* ndp_walk routine. Delete the nce if it is associated with the ill |
|
481 |
* that is going away. Always called as a writer. |
|
482 |
*/ |
|
483 |
void |
|
484 |
ndp_delete_per_ill(nce_t *nce, uchar_t *arg) |
|
485 |
{ |
|
486 |
if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { |
|
487 |
ndp_delete(nce); |
|
488 |
} |
|
489 |
} |
|
490 |
||
491 |
/* |
|
492 |
* Walk a list of to be inactive NCEs and blow away all the ires. |
|
493 |
*/ |
|
494 |
static void |
|
495 |
nce_ire_delete_list(nce_t *nce) |
|
496 |
{ |
|
497 |
nce_t *nce_next; |
|
498 |
||
499 |
ASSERT(nce != NULL); |
|
500 |
while (nce != NULL) { |
|
501 |
nce_next = nce->nce_next; |
|
502 |
nce->nce_next = NULL; |
|
503 |
||
504 |
/* |
|
505 |
* It is possible for the last ndp walker (this thread) |
|
506 |
* to come here after ndp_delete has marked the nce CONDEMNED |
|
507 |
* and before it has removed the nce from the fastpath list |
|
508 |
* or called untimeout. So we need to do it here. It is safe |
|
509 |
* for both ndp_delete and this thread to do it twice or |
|
510 |
* even simultaneously since each of the threads has a |
|
511 |
* reference on the nce. |
|
512 |
*/ |
|
513 |
nce_fastpath_list_delete(nce); |
|
514 |
/* |
|
515 |
* Cancel any running timer. Timeout can't be restarted |
|
516 |
* since CONDEMNED is set. Can't hold nce_lock across untimeout. |
|
517 |
* Passing invalid timeout id is fine. |
|
518 |
*/ |
|
519 |
if (nce->nce_timeout_id != 0) { |
|
520 |
(void) untimeout(nce->nce_timeout_id); |
|
521 |
nce->nce_timeout_id = 0; |
|
522 |
} |
|
523 |
||
524 |
ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, |
|
525 |
nce_ire_delete1, (char *)nce, nce->nce_ill); |
|
526 |
NCE_REFRELE_NOTR(nce); |
|
527 |
nce = nce_next; |
|
528 |
} |
|
529 |
} |
|
530 |
||
531 |
/* |
|
532 |
* Delete an ire when the nce goes away. |
|
533 |
*/ |
|
534 |
/* ARGSUSED */ |
|
535 |
static void |
|
536 |
nce_ire_delete(nce_t *nce) |
|
537 |
{ |
|
538 |
ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, |
|
539 |
nce_ire_delete1, (char *)nce, nce->nce_ill); |
|
540 |
NCE_REFRELE_NOTR(nce); |
|
541 |
} |
|
542 |
||
543 |
/* |
|
544 |
* ire_walk routine used to delete every IRE that shares this nce |
|
545 |
*/ |
|
546 |
static void |
|
547 |
nce_ire_delete1(ire_t *ire, char *nce_arg) |
|
548 |
{ |
|
549 |
nce_t *nce = (nce_t *)nce_arg; |
|
550 |
||
551 |
ASSERT(ire->ire_type == IRE_CACHE); |
|
552 |
||
553 |
if (ire->ire_nce == nce) |
|
554 |
ire_delete(ire); |
|
555 |
} |
|
556 |
||
557 |
/* |
|
558 |
* Cache entry lookup. Try to find an nce matching the parameters passed. |
|
559 |
* If one is found, the refcnt on the nce will be incremented. |
|
560 |
*/ |
|
561 |
nce_t * |
|
562 |
ndp_lookup(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) |
|
563 |
{ |
|
564 |
nce_t *nce; |
|
565 |
||
566 |
if (!caller_holds_lock) |
|
567 |
mutex_enter(&ndp_g_lock); |
|
568 |
nce = nce_lookup_addr(ill, addr); |
|
569 |
if (nce == NULL) |
|
570 |
nce = nce_lookup_mapping(ill, addr); |
|
571 |
if (!caller_holds_lock) |
|
572 |
mutex_exit(&ndp_g_lock); |
|
573 |
return (nce); |
|
574 |
} |
|
575 |
||
576 |
/* |
|
577 |
* Cache entry lookup. Try to find an nce matching the parameters passed. |
|
578 |
* Look only for exact entries (no mappings). If an nce is found, increment |
|
579 |
* the hold count on that nce. |
|
580 |
*/ |
|
581 |
static nce_t * |
|
582 |
nce_lookup_addr(ill_t *ill, const in6_addr_t *addr) |
|
583 |
{ |
|
584 |
nce_t *nce; |
|
585 |
||
586 |
ASSERT(ill != NULL); |
|
587 |
ASSERT(MUTEX_HELD(&ndp_g_lock)); |
|
588 |
if (IN6_IS_ADDR_UNSPECIFIED(addr)) |
|
589 |
return (NULL); |
|
590 |
nce = *((nce_t **)NCE_HASH_PTR(*addr)); |
|
591 |
for (; nce != NULL; nce = nce->nce_next) { |
|
592 |
if (nce->nce_ill == ill) { |
|
593 |
if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && |
|
594 |
IN6_ARE_ADDR_EQUAL(&nce->nce_mask, |
|
595 |
&ipv6_all_ones)) { |
|
596 |
mutex_enter(&nce->nce_lock); |
|
597 |
if (!(nce->nce_flags & NCE_F_CONDEMNED)) { |
|
598 |
NCE_REFHOLD_LOCKED(nce); |
|
599 |
mutex_exit(&nce->nce_lock); |
|
600 |
break; |
|
601 |
} |
|
602 |
mutex_exit(&nce->nce_lock); |
|
603 |
} |
|
604 |
} |
|
605 |
} |
|
606 |
return (nce); |
|
607 |
} |
|
608 |
||
609 |
/* |
|
610 |
* Cache entry lookup. Try to find an nce matching the parameters passed. |
|
611 |
* Look only for mappings. |
|
612 |
*/ |
|
613 |
static nce_t * |
|
614 |
nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) |
|
615 |
{ |
|
616 |
nce_t *nce; |
|
617 |
||
618 |
ASSERT(ill != NULL); |
|
619 |
ASSERT(MUTEX_HELD(&ndp_g_lock)); |
|
620 |
if (!IN6_IS_ADDR_MULTICAST(addr)) |
|
621 |
return (NULL); |
|
622 |
nce = nce_mask_entries; |
|
623 |
for (; nce != NULL; nce = nce->nce_next) |
|
624 |
if (nce->nce_ill == ill && |
|
625 |
(V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { |
|
626 |
mutex_enter(&nce->nce_lock); |
|
627 |
if (!(nce->nce_flags & NCE_F_CONDEMNED)) { |
|
628 |
NCE_REFHOLD_LOCKED(nce); |
|
629 |
mutex_exit(&nce->nce_lock); |
|
630 |
break; |
|
631 |
} |
|
632 |
mutex_exit(&nce->nce_lock); |
|
633 |
} |
|
634 |
return (nce); |
|
635 |
} |
|
636 |
||
637 |
/* |
|
638 |
* Process passed in parameters either from an incoming packet or via |
|
639 |
* user ioctl. |
|
640 |
*/ |
|
641 |
void |
|
642 |
ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) |
|
643 |
{ |
|
644 |
ill_t *ill = nce->nce_ill; |
|
645 |
uint32_t hw_addr_len = ill->ill_nd_lla_len; |
|
646 |
mblk_t *mp; |
|
647 |
boolean_t ll_updated = B_FALSE; |
|
648 |
boolean_t ll_changed; |
|
649 |
||
650 |
/* |
|
651 |
* No updates of link layer address or the neighbor state is |
|
652 |
* allowed, when the cache is in NONUD state. This still |
|
653 |
* allows for responding to reachability solicitation. |
|
654 |
*/ |
|
655 |
mutex_enter(&nce->nce_lock); |
|
656 |
if (nce->nce_state == ND_INCOMPLETE) { |
|
657 |
if (hw_addr == NULL) { |
|
658 |
mutex_exit(&nce->nce_lock); |
|
659 |
return; |
|
660 |
} |
|
661 |
nce_set_ll(nce, hw_addr); |
|
662 |
/* |
|
663 |
* Update nce state and send the queued packets |
|
664 |
* back to ip this time ire will be added. |
|
665 |
*/ |
|
666 |
if (flag & ND_NA_FLAG_SOLICITED) { |
|
667 |
nce_update(nce, ND_REACHABLE, NULL); |
|
668 |
} else { |
|
669 |
nce_update(nce, ND_STALE, NULL); |
|
670 |
} |
|
671 |
mutex_exit(&nce->nce_lock); |
|
672 |
nce_fastpath(nce); |
|
673 |
mutex_enter(&nce->nce_lock); |
|
674 |
mp = nce->nce_qd_mp; |
|
675 |
nce->nce_qd_mp = NULL; |
|
676 |
mutex_exit(&nce->nce_lock); |
|
677 |
while (mp != NULL) { |
|
678 |
mblk_t *nxt_mp; |
|
679 |
||
680 |
nxt_mp = mp->b_next; |
|
681 |
mp->b_next = NULL; |
|
682 |
if (mp->b_prev != NULL) { |
|
683 |
ill_t *inbound_ill; |
|
684 |
queue_t *fwdq = NULL; |
|
685 |
uint_t ifindex; |
|
686 |
||
687 |
ifindex = (uint_t)(uintptr_t)mp->b_prev; |
|
688 |
inbound_ill = ill_lookup_on_ifindex(ifindex, |
|
689 |
B_TRUE, NULL, NULL, NULL, NULL); |
|
690 |
if (inbound_ill == NULL) { |
|
691 |
mp->b_prev = NULL; |
|
692 |
freemsg(mp); |
|
693 |
return; |
|
694 |
} else { |
|
695 |
fwdq = inbound_ill->ill_rq; |
|
696 |
} |
|
697 |
mp->b_prev = NULL; |
|
698 |
/* |
|
699 |
* Send a forwarded packet back into ip_rput_v6 |
|
700 |
* just as in ire_send_v6(). |
|
701 |
* Extract the queue from b_prev (set in |
|
702 |
* ip_rput_data_v6). |
|
703 |
*/ |
|
704 |
if (fwdq != NULL) { |
|
705 |
/* |
|
706 |
* Forwarded packets hop count will |
|
707 |
* get decremented in ip_rput_data_v6 |
|
708 |
*/ |
|
709 |
put(fwdq, mp); |
|
710 |
} else { |
|
711 |
/* |
|
712 |
* Send locally originated packets back |
|
713 |
* into * ip_wput_v6. |
|
714 |
*/ |
|
715 |
put(ill->ill_wq, mp); |
|
716 |
} |
|
717 |
ill_refrele(inbound_ill); |
|
718 |
} else { |
|
719 |
put(ill->ill_wq, mp); |
|
720 |
} |
|
721 |
mp = nxt_mp; |
|
722 |
} |
|
723 |
return; |
|
724 |
} |
|
725 |
ll_changed = nce_cmp_ll_addr(nce, (char *)hw_addr, hw_addr_len); |
|
726 |
if (!is_adv) { |
|
727 |
/* If this is a SOLICITATION request only */ |
|
728 |
if (ll_changed) |
|
729 |
nce_update(nce, ND_STALE, hw_addr); |
|
730 |
mutex_exit(&nce->nce_lock); |
|
731 |
return; |
|
732 |
} |
|
733 |
if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { |
|
734 |
/* If in any other state than REACHABLE, ignore */ |
|
735 |
if (nce->nce_state == ND_REACHABLE) { |
|
736 |
nce_update(nce, ND_STALE, NULL); |
|
737 |
} |
|
738 |
mutex_exit(&nce->nce_lock); |
|
739 |
return; |
|
740 |
} else { |
|
741 |
if (ll_changed) { |
|
742 |
nce_update(nce, ND_UNCHANGED, hw_addr); |
|
743 |
ll_updated = B_TRUE; |
|
744 |
} |
|
745 |
if (flag & ND_NA_FLAG_SOLICITED) { |
|
746 |
nce_update(nce, ND_REACHABLE, NULL); |
|
747 |
} else { |
|
748 |
if (ll_updated) { |
|
749 |
nce_update(nce, ND_STALE, NULL); |
|
750 |
} |
|
751 |
} |
|
752 |
mutex_exit(&nce->nce_lock); |
|
753 |
if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & |
|
754 |
NCE_F_ISROUTER)) { |
|
755 |
ire_t *ire; |
|
756 |
||
757 |
/* |
|
758 |
* Router turned to host. We need to remove the |
|
759 |
* entry as well as any default route that may be |
|
760 |
* using this as a next hop. This is required by |
|
761 |
* section 7.2.5 of RFC 2461. |
|
762 |
*/ |
|
763 |
ire = ire_ftable_lookup_v6(&ipv6_all_zeros, |
|
764 |
&ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, |
|
765 |
nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, |
|
766 |
MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | |
|
767 |
MATCH_IRE_DEFAULT); |
|
768 |
if (ire != NULL) { |
|
769 |
ip_rts_rtmsg(RTM_DELETE, ire, 0); |
|
770 |
ire_delete(ire); |
|
771 |
ire_refrele(ire); |
|
772 |
} |
|
773 |
ndp_delete(nce); |
|
774 |
} |
|
775 |
} |
|
776 |
} |
|
777 |
||
778 |
/* |
|
779 |
* Pass arg1 to the pfi supplied, along with each nce in existence. |
|
780 |
* ndp_walk() places a REFHOLD on the nce and drops the lock when |
|
781 |
* walking the hash list. |
|
782 |
*/ |
|
783 |
void |
|
784 |
ndp_walk_impl(ill_t *ill, pfi_t pfi, uchar_t *arg1, boolean_t trace) |
|
785 |
{ |
|
786 |
||
787 |
nce_t *nce; |
|
788 |
nce_t *nce1; |
|
789 |
nce_t **ncep; |
|
790 |
nce_t *free_nce_list = NULL; |
|
791 |
||
792 |
mutex_enter(&ndp_g_lock); |
|
793 |
ndp_g_walker++; /* Prevent ndp_delete from unlink and free of NCE */ |
|
794 |
mutex_exit(&ndp_g_lock); |
|
795 |
for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { |
|
796 |
for (nce = *ncep; nce; nce = nce1) { |
|
797 |
nce1 = nce->nce_next; |
|
798 |
if (ill == NULL || nce->nce_ill == ill) { |
|
799 |
if (trace) { |
|
800 |
NCE_REFHOLD(nce); |
|
801 |
(*pfi)(nce, arg1); |
|
802 |
NCE_REFRELE(nce); |
|
803 |
} else { |
|
804 |
NCE_REFHOLD_NOTR(nce); |
|
805 |
(*pfi)(nce, arg1); |
|
806 |
NCE_REFRELE_NOTR(nce); |
|
807 |
} |
|
808 |
} |
|
809 |
} |
|
810 |
} |
|
811 |
for (nce = nce_mask_entries; nce; nce = nce1) { |
|
812 |
nce1 = nce->nce_next; |
|
813 |
if (ill == NULL || nce->nce_ill == ill) { |
|
814 |
if (trace) { |
|
815 |
NCE_REFHOLD(nce); |
|
816 |
(*pfi)(nce, arg1); |
|
817 |
NCE_REFRELE(nce); |
|
818 |
} else { |
|
819 |
NCE_REFHOLD_NOTR(nce); |
|
820 |
(*pfi)(nce, arg1); |
|
821 |
NCE_REFRELE_NOTR(nce); |
|
822 |
} |
|
823 |
} |
|
824 |
} |
|
825 |
mutex_enter(&ndp_g_lock); |
|
826 |
ndp_g_walker--; |
|
827 |
/* |
|
828 |
* While NCE's are removed from global list they are placed |
|
829 |
* in a private list, to be passed to nce_ire_delete_list(). |
|
830 |
* The reason is, there may be ires pointing to this nce |
|
831 |
* which needs to cleaned up. |
|
832 |
*/ |
|
833 |
if (ndp_g_walker_cleanup && ndp_g_walker == 0) { |
|
834 |
/* Time to delete condemned entries */ |
|
835 |
for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { |
|
836 |
nce = *ncep; |
|
837 |
if (nce != NULL) { |
|
838 |
nce_remove(nce, &free_nce_list); |
|
839 |
} |
|
840 |
} |
|
841 |
nce = nce_mask_entries; |
|
842 |
if (nce != NULL) { |
|
843 |
nce_remove(nce, &free_nce_list); |
|
844 |
} |
|
845 |
ndp_g_walker_cleanup = B_FALSE; |
|
846 |
} |
|
847 |
mutex_exit(&ndp_g_lock); |
|
848 |
||
849 |
if (free_nce_list != NULL) { |
|
850 |
nce_ire_delete_list(free_nce_list); |
|
851 |
} |
|
852 |
} |
|
853 |
||
854 |
void |
|
855 |
ndp_walk(ill_t *ill, pfi_t pfi, uchar_t *arg1) |
|
856 |
{ |
|
857 |
ndp_walk_impl(ill, pfi, arg1, B_TRUE); |
|
858 |
} |
|
859 |
||
860 |
/* |
|
861 |
* Prepend the zoneid using an ipsec_out_t for later use by functions like |
|
862 |
* ip_rput_v6() after neighbor discovery has taken place. If the message |
|
863 |
* block already has a M_CTL at the front of it, then simply set the zoneid |
|
864 |
* appropriately. |
|
865 |
*/ |
|
866 |
static mblk_t * |
|
867 |
ndp_prepend_zone(mblk_t *mp, zoneid_t zoneid) |
|
868 |
{ |
|
869 |
mblk_t *first_mp; |
|
870 |
ipsec_out_t *io; |
|
871 |
||
872 |
if (mp->b_datap->db_type == M_CTL) { |
|
873 |
io = (ipsec_out_t *)mp->b_rptr; |
|
874 |
ASSERT(io->ipsec_out_type == IPSEC_OUT); |
|
875 |
io->ipsec_out_zoneid = zoneid; |
|
876 |
return (mp); |
|
877 |
} |
|
878 |
||
879 |
first_mp = ipsec_alloc_ipsec_out(); |
|
880 |
if (first_mp == NULL) |
|
881 |
return (NULL); |
|
882 |
io = (ipsec_out_t *)first_mp->b_rptr; |
|
883 |
/* This is not a secure packet */ |
|
884 |
io->ipsec_out_secure = B_FALSE; |
|
885 |
io->ipsec_out_zoneid = zoneid; |
|
886 |
first_mp->b_cont = mp; |
|
887 |
return (first_mp); |
|
888 |
} |
|
889 |
||
890 |
/* |
|
891 |
* Process resolve requests. Handles both mapped entries |
|
892 |
* as well as cases that needs to be send out on the wire. |
|
893 |
* Lookup a NCE for a given IRE. Regardless of whether one exists |
|
894 |
* or one is created, we defer making ire point to nce until the |
|
895 |
* ire is actually added at which point the nce_refcnt on the nce is |
|
896 |
* incremented. This is done primarily to have symmetry between ire_add() |
|
897 |
* and ire_delete() which decrements the nce_refcnt, when an ire is deleted. |
|
898 |
*/ |
|
899 |
int |
|
900 |
ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) |
|
901 |
{ |
|
902 |
nce_t *nce; |
|
903 |
int err = 0; |
|
904 |
uint32_t ms; |
|
905 |
mblk_t *mp_nce = NULL; |
|
906 |
||
907 |
ASSERT(ill != NULL); |
|
908 |
if (IN6_IS_ADDR_MULTICAST(dst)) { |
|
909 |
err = nce_set_multicast(ill, dst); |
|
910 |
return (err); |
|
911 |
} |
|
912 |
err = ndp_lookup_then_add(ill, |
|
913 |
NULL, /* No hardware address */ |
|
914 |
dst, |
|
915 |
&ipv6_all_ones, |
|
916 |
&ipv6_all_zeros, |
|
917 |
0, |
|
918 |
(ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, |
|
919 |
ND_INCOMPLETE, |
|
920 |
&nce); |
|
921 |
||
922 |
switch (err) { |
|
923 |
case 0: |
|
924 |
/* |
|
925 |
* New cache entry was created. Make sure that the state |
|
926 |
* is not ND_INCOMPLETE. It can be in some other state |
|
927 |
* even before we send out the solicitation as we could |
|
928 |
* get un-solicited advertisements. |
|
929 |
* |
|
930 |
* If this is an XRESOLV interface, simply return 0, |
|
931 |
* since we don't want to solicit just yet. |
|
932 |
*/ |
|
933 |
if (ill->ill_flags & ILLF_XRESOLV) { |
|
934 |
NCE_REFRELE(nce); |
|
935 |
return (0); |
|
936 |
} |
|
937 |
rw_enter(&ill_g_lock, RW_READER); |
|
938 |
mutex_enter(&nce->nce_lock); |
|
939 |
if (nce->nce_state != ND_INCOMPLETE) { |
|
940 |
mutex_exit(&nce->nce_lock); |
|
941 |
rw_exit(&ill_g_lock); |
|
942 |
NCE_REFRELE(nce); |
|
943 |
return (0); |
|
944 |
} |
|
945 |
mp_nce = ndp_prepend_zone(mp, zoneid); |
|
946 |
if (mp_nce == NULL) { |
|
947 |
/* The caller will free mp */ |
|
948 |
mutex_exit(&nce->nce_lock); |
|
949 |
rw_exit(&ill_g_lock); |
|
950 |
ndp_delete(nce); |
|
951 |
NCE_REFRELE(nce); |
|
952 |
return (ENOMEM); |
|
953 |
} |
|
954 |
ms = nce_solicit(nce, mp_nce); |
|
955 |
rw_exit(&ill_g_lock); |
|
956 |
if (ms == 0) { |
|
957 |
/* The caller will free mp */ |
|
958 |
if (mp_nce != mp) |
|
959 |
freeb(mp_nce); |
|
960 |
mutex_exit(&nce->nce_lock); |
|
961 |
ndp_delete(nce); |
|
962 |
NCE_REFRELE(nce); |
|
963 |
return (EBUSY); |
|
964 |
} |
|
965 |
mutex_exit(&nce->nce_lock); |
|
966 |
NDP_RESTART_TIMER(nce, (clock_t)ms); |
|
967 |
NCE_REFRELE(nce); |
|
968 |
return (EINPROGRESS); |
|
969 |
case EEXIST: |
|
970 |
/* Resolution in progress just queue the packet */ |
|
971 |
mutex_enter(&nce->nce_lock); |
|
972 |
if (nce->nce_state == ND_INCOMPLETE) { |
|
973 |
mp_nce = ndp_prepend_zone(mp, zoneid); |
|
974 |
if (mp_nce == NULL) { |
|
975 |
err = ENOMEM; |
|
976 |
} else { |
|
977 |
nce_queue_mp(nce, mp_nce); |
|
978 |
err = EINPROGRESS; |
|
979 |
} |
|
980 |
} else { |
|
981 |
/* |
|
982 |
* Any other state implies we have |
|
983 |
* a nce but IRE needs to be added ... |
|
984 |
* ire_add_v6() will take care of the |
|
985 |
* the case when the nce becomes CONDEMNED |
|
986 |
* before the ire is added to the table. |
|
987 |
*/ |
|
988 |
err = 0; |
|
989 |
} |
|
990 |
mutex_exit(&nce->nce_lock); |
|
991 |
NCE_REFRELE(nce); |
|
992 |
break; |
|
993 |
default: |
|
994 |
ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); |
|
995 |
break; |
|
996 |
} |
|
997 |
return (err); |
|
998 |
} |
|
999 |
||
1000 |
/* |
|
1001 |
* When there is no resolver, the link layer template is passed in |
|
1002 |
* the IRE. |
|
1003 |
* Lookup a NCE for a given IRE. Regardless of whether one exists |
|
1004 |
* or one is created, we defer making ire point to nce until the |
|
1005 |
* ire is actually added at which point the nce_refcnt on the nce is |
|
1006 |
* incremented. This is done primarily to have symmetry between ire_add() |
|
1007 |
* and ire_delete() which decrements the nce_refcnt, when an ire is deleted. |
|
1008 |
*/ |
|
1009 |
int |
|
1010 |
ndp_noresolver(ill_t *ill, const in6_addr_t *dst) |
|
1011 |
{ |
|
1012 |
nce_t *nce; |
|
1013 |
int err = 0; |
|
1014 |
||
1015 |
ASSERT(ill != NULL); |
|
1016 |
if (IN6_IS_ADDR_MULTICAST(dst)) { |
|
1017 |
err = nce_set_multicast(ill, dst); |
|
1018 |
return (err); |
|
1019 |
} |
|
1020 |
||
1021 |
err = ndp_lookup_then_add(ill, |
|
1022 |
NULL, /* hardware address */ |
|
1023 |
dst, |
|
1024 |
&ipv6_all_ones, |
|
1025 |
&ipv6_all_zeros, |
|
1026 |
0, |
|
1027 |
(ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, |
|
1028 |
ND_REACHABLE, |
|
1029 |
&nce); |
|
1030 |
||
1031 |
switch (err) { |
|
1032 |
case 0: |
|
1033 |
/* |
|
1034 |
* Cache entry with a proper resolver cookie was |
|
1035 |
* created. |
|
1036 |
*/ |
|
1037 |
NCE_REFRELE(nce); |
|
1038 |
break; |
|
1039 |
case EEXIST: |
|
1040 |
err = 0; |
|
1041 |
NCE_REFRELE(nce); |
|
1042 |
break; |
|
1043 |
default: |
|
1044 |
ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); |
|
1045 |
break; |
|
1046 |
} |
|
1047 |
return (err); |
|
1048 |
} |
|
1049 |
||
1050 |
/* |
|
1051 |
* For each interface an entry is added for the unspecified multicast group. |
|
1052 |
* Here that mapping is used to form the multicast cache entry for a particular |
|
1053 |
* multicast destination. |
|
1054 |
*/ |
|
1055 |
static int |
|
1056 |
nce_set_multicast(ill_t *ill, const in6_addr_t *dst) |
|
1057 |
{ |
|
1058 |
nce_t *mnce; /* Multicast mapping entry */ |
|
1059 |
nce_t *nce; |
|
1060 |
uchar_t *hw_addr = NULL; |
|
1061 |
int err = 0; |
|
1062 |
||
1063 |
ASSERT(ill != NULL); |
|
1064 |
ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); |
|
1065 |
||
1066 |
mutex_enter(&ndp_g_lock); |
|
1067 |
nce = nce_lookup_addr(ill, dst); |
|
1068 |
if (nce != NULL) { |
|
1069 |
mutex_exit(&ndp_g_lock); |
|
1070 |
NCE_REFRELE(nce); |
|
1071 |
return (0); |
|
1072 |
} |
|
1073 |
/* No entry, now lookup for a mapping this should never fail */ |
|
1074 |
mnce = nce_lookup_mapping(ill, dst); |
|
1075 |
if (mnce == NULL) { |
|
1076 |
/* Something broken for the interface. */ |
|
1077 |
mutex_exit(&ndp_g_lock); |
|
1078 |
return (ESRCH); |
|
1079 |
} |
|
1080 |
ASSERT(mnce->nce_flags & NCE_F_MAPPING); |
|
1081 |
if (ill->ill_net_type == IRE_IF_RESOLVER) { |
|
1082 |
/* |
|
1083 |
* For IRE_IF_RESOLVER a hardware mapping can be |
|
1084 |
* generated, for IRE_IF_NORESOLVER, resolution cookie |
|
1085 |
* in the ill is copied in ndp_add(). |
|
1086 |
*/ |
|
1087 |
hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); |
|
1088 |
if (hw_addr == NULL) { |
|
1089 |
mutex_exit(&ndp_g_lock); |
|
1090 |
NCE_REFRELE(mnce); |
|
1091 |
return (ENOMEM); |
|
1092 |
} |
|
1093 |
nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); |
|
1094 |
} |
|
1095 |
NCE_REFRELE(mnce); |
|
1096 |
/* |
|
1097 |
* IRE_IF_NORESOLVER type simply copies the resolution |
|
1098 |
* cookie passed in. So no hw_addr is needed. |
|
1099 |
*/ |
|
1100 |
err = ndp_add(ill, |
|
1101 |
hw_addr, |
|
1102 |
dst, |
|
1103 |
&ipv6_all_ones, |
|
1104 |
&ipv6_all_zeros, |
|
1105 |
0, |
|
1106 |
NCE_F_NONUD, |
|
1107 |
ND_REACHABLE, |
|
1108 |
&nce); |
|
1109 |
mutex_exit(&ndp_g_lock); |
|
1110 |
if (hw_addr != NULL) |
|
1111 |
kmem_free(hw_addr, ill->ill_nd_lla_len); |
|
1112 |
if (err != 0) { |
|
1113 |
ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); |
|
1114 |
return (err); |
|
1115 |
} |
|
1116 |
NCE_REFRELE(nce); |
|
1117 |
return (0); |
|
1118 |
} |
|
1119 |
||
1120 |
/* |
|
1121 |
* Return the link layer address, and any flags of a nce. |
|
1122 |
*/ |
|
1123 |
int |
|
1124 |
ndp_query(ill_t *ill, struct lif_nd_req *lnr) |
|
1125 |
{ |
|
1126 |
nce_t *nce; |
|
1127 |
in6_addr_t *addr; |
|
1128 |
sin6_t *sin6; |
|
1129 |
dl_unitdata_req_t *dl; |
|
1130 |
||
1131 |
ASSERT(ill != NULL); |
|
1132 |
sin6 = (sin6_t *)&lnr->lnr_addr; |
|
1133 |
addr = &sin6->sin6_addr; |
|
1134 |
||
1135 |
nce = ndp_lookup(ill, addr, B_FALSE); |
|
1136 |
if (nce == NULL) |
|
1137 |
return (ESRCH); |
|
1138 |
/* If in INCOMPLETE state, no link layer address is available yet */ |
|
1139 |
if (nce->nce_state == ND_INCOMPLETE) |
|
1140 |
goto done; |
|
1141 |
dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; |
|
1142 |
if (ill->ill_flags & ILLF_XRESOLV) |
|
1143 |
lnr->lnr_hdw_len = dl->dl_dest_addr_length; |
|
1144 |
else |
|
1145 |
lnr->lnr_hdw_len = ill->ill_nd_lla_len; |
|
1146 |
ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= |
|
1147 |
sizeof (lnr->lnr_hdw_addr)); |
|
1148 |
bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), |
|
1149 |
(uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); |
|
1150 |
if (nce->nce_flags & NCE_F_ISROUTER) |
|
1151 |
lnr->lnr_flags = NDF_ISROUTER_ON; |
|
1152 |
if (nce->nce_flags & NCE_F_PROXY) |
|
1153 |
lnr->lnr_flags |= NDF_PROXY_ON; |
|
1154 |
if (nce->nce_flags & NCE_F_ANYCAST) |
|
1155 |
lnr->lnr_flags |= NDF_ANYCAST_ON; |
|
1156 |
done: |
|
1157 |
NCE_REFRELE(nce); |
|
1158 |
return (0); |
|
1159 |
} |
|
1160 |
||
1161 |
/* |
|
1162 |
* Send Enable/Disable multicast reqs to driver. |
|
1163 |
*/ |
|
1164 |
int |
|
1165 |
ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, |
|
1166 |
uint32_t hw_addr_offset, mblk_t *mp) |
|
1167 |
{ |
|
1168 |
nce_t *nce; |
|
1169 |
uchar_t *hw_addr; |
|
1170 |
||
1171 |
ASSERT(ill != NULL); |
|
1172 |
ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); |
|
1173 |
hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); |
|
1174 |
if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { |
|
1175 |
freemsg(mp); |
|
1176 |
return (EINVAL); |
|
1177 |
} |
|
1178 |
mutex_enter(&ndp_g_lock); |
|
1179 |
nce = nce_lookup_mapping(ill, addr); |
|
1180 |
if (nce == NULL) { |
|
1181 |
mutex_exit(&ndp_g_lock); |
|
1182 |
freemsg(mp); |
|
1183 |
return (ESRCH); |
|
1184 |
} |
|
1185 |
mutex_exit(&ndp_g_lock); |
|
1186 |
/* |
|
1187 |
* Update dl_addr_length and dl_addr_offset for primitives that |
|
1188 |
* have physical addresses as opposed to full saps |
|
1189 |
*/ |
|
1190 |
switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { |
|
1191 |
case DL_ENABMULTI_REQ: |
|
1192 |
/* Track the state if this is the first enabmulti */ |
|
1193 |
if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) |
|
1194 |
ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; |
|
1195 |
ip1dbg(("ndp_mcastreq: ENABMULTI\n")); |
|
1196 |
break; |
|
1197 |
case DL_DISABMULTI_REQ: |
|
1198 |
ip1dbg(("ndp_mcastreq: DISABMULTI\n")); |
|
1199 |
break; |
|
1200 |
default: |
|
1201 |
NCE_REFRELE(nce); |
|
1202 |
ip1dbg(("ndp_mcastreq: default\n")); |
|
1203 |
return (EINVAL); |
|
1204 |
} |
|
1205 |
nce_make_mapping(nce, hw_addr, (uchar_t *)addr); |
|
1206 |
NCE_REFRELE(nce); |
|
1207 |
putnext(ill->ill_wq, mp); |
|
1208 |
return (0); |
|
1209 |
} |
|
1210 |
||
1211 |
/* |
|
1212 |
* Send a neighbor solicitation. |
|
1213 |
* Returns number of milliseconds after which we should either rexmit or abort. |
|
1214 |
* Return of zero means we should abort. |
|
1215 |
* The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. |
|
1216 |
* |
|
1217 |
* NOTE: This routine drops nce_lock (and later reacquires it) when sending |
|
1218 |
* the packet. |
|
1219 |
* NOTE: This routine does not consume mp. |
|
1220 |
*/ |
|
1221 |
uint32_t |
|
1222 |
nce_solicit(nce_t *nce, mblk_t *mp) |
|
1223 |
{ |
|
1224 |
ill_t *ill; |
|
1225 |
ill_t *src_ill; |
|
1226 |
ip6_t *ip6h; |
|
1227 |
in6_addr_t src; |
|
1228 |
in6_addr_t dst; |
|
1229 |
ipif_t *ipif; |
|
1230 |
ip6i_t *ip6i; |
|
1231 |
boolean_t dropped = B_FALSE; |
|
1232 |
||
1233 |
ASSERT(RW_READ_HELD(&ill_g_lock)); |
|
1234 |
ASSERT(MUTEX_HELD(&nce->nce_lock)); |
|
1235 |
ill = nce->nce_ill; |
|
1236 |
ASSERT(ill != NULL); |
|
1237 |
||
1238 |
if (nce->nce_rcnt == 0) { |
|
1239 |
return (0); |
|
1240 |
} |
|
1241 |
||
1242 |
if (mp == NULL) { |
|
1243 |
ASSERT(nce->nce_qd_mp != NULL); |
|
1244 |
mp = nce->nce_qd_mp; |
|
1245 |
} else { |
|
1246 |
nce_queue_mp(nce, mp); |
|
1247 |
} |
|
1248 |
||
1249 |
/* Handle ip_newroute_v6 giving us IPSEC packets */ |
|
1250 |
if (mp->b_datap->db_type == M_CTL) |
|
1251 |
mp = mp->b_cont; |
|
1252 |
||
1253 |
ip6h = (ip6_t *)mp->b_rptr; |
|
1254 |
if (ip6h->ip6_nxt == IPPROTO_RAW) { |
|
1255 |
/* |
|
1256 |
* This message should have been pulled up already in |
|
1257 |
* ip_wput_v6. We can't do pullups here because the message |
|
1258 |
* could be from the nce_qd_mp which could have b_next/b_prev |
|
1259 |
* non-NULL. |
|
1260 |
*/ |
|
1261 |
ip6i = (ip6i_t *)ip6h; |
|
1262 |
ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= |
|
1263 |
sizeof (ip6i_t) + IPV6_HDR_LEN); |
|
1264 |
ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); |
|
1265 |
} |
|
1266 |
src = ip6h->ip6_src; |
|
1267 |
/* |
|
1268 |
* If the src of outgoing packet is one of the assigned interface |
|
1269 |
* addresses use it, otherwise we will pick the source address below. |
|
1270 |
*/ |
|
1271 |
src_ill = ill; |
|
1272 |
if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { |
|
1273 |
if (ill->ill_group != NULL) |
|
1274 |
src_ill = ill->ill_group->illgrp_ill; |
|
1275 |
for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { |
|
1276 |
for (ipif = src_ill->ill_ipif; ipif != NULL; |
|
1277 |
ipif = ipif->ipif_next) { |
|
1278 |
if (IN6_ARE_ADDR_EQUAL(&src, |
|
1279 |
&ipif->ipif_v6lcl_addr)) { |
|
1280 |
break; |
|
1281 |
} |
|
1282 |
} |
|
1283 |
if (ipif != NULL) |
|
1284 |
break; |
|
1285 |
} |
|
1286 |
if (src_ill == NULL) { |
|
1287 |
/* May be a forwarding packet */ |
|
1288 |
src_ill = ill; |
|
1289 |
src = ipv6_all_zeros; |
|
1290 |
} |
|
1291 |
} |
|
1292 |
dst = nce->nce_addr; |
|
1293 |
/* |
|
1294 |
* If source address is unspecified, nce_xmit will choose |
|
1295 |
* one for us and initialize the hardware address also |
|
1296 |
* appropriately. |
|
1297 |
*/ |
|
1298 |
if (IN6_IS_ADDR_UNSPECIFIED(&src)) |
|
1299 |
src_ill = NULL; |
|
1300 |
nce->nce_rcnt--; |
|
1301 |
mutex_exit(&nce->nce_lock); |
|
1302 |
rw_exit(&ill_g_lock); |
|
1303 |
dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, |
|
1304 |
&dst, 0); |
|
1305 |
rw_enter(&ill_g_lock, RW_READER); |
|
1306 |
mutex_enter(&nce->nce_lock); |
|
1307 |
if (dropped) |
|
1308 |
nce->nce_rcnt++; |
|
1309 |
return (ill->ill_reachable_retrans_time); |
|
1310 |
} |
|
1311 |
||
1312 |
void |
|
1313 |
ndp_input_solicit(ill_t *ill, mblk_t *mp) |
|
1314 |
{ |
|
1315 |
nd_neighbor_solicit_t *ns; |
|
1316 |
uint32_t hlen = ill->ill_nd_lla_len; |
|
1317 |
uchar_t *haddr = NULL; |
|
1318 |
icmp6_t *icmp_nd; |
|
1319 |
ip6_t *ip6h; |
|
1320 |
nce_t *our_nce = NULL; |
|
1321 |
in6_addr_t target; |
|
1322 |
in6_addr_t src; |
|
1323 |
int len; |
|
1324 |
int flag = 0; |
|
1325 |
nd_opt_hdr_t *opt = NULL; |
|
1326 |
boolean_t bad_solicit = B_FALSE; |
|
1327 |
mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; |
|
1328 |
||
1329 |
ip6h = (ip6_t *)mp->b_rptr; |
|
1330 |
icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
|
1331 |
len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; |
|
1332 |
src = ip6h->ip6_src; |
|
1333 |
ns = (nd_neighbor_solicit_t *)icmp_nd; |
|
1334 |
target = ns->nd_ns_target; |
|
1335 |
if (IN6_IS_ADDR_MULTICAST(&target)) { |
|
1336 |
if (ip_debug > 2) { |
|
1337 |
/* ip1dbg */ |
|
1338 |
pr_addr_dbg("ndp_input_solicit: Target is" |
|
1339 |
" multicast! %s\n", AF_INET6, &target); |
|
1340 |
} |
|
1341 |
bad_solicit = B_TRUE; |
|
1342 |
goto done; |
|
1343 |
} |
|
1344 |
if (len > sizeof (nd_neighbor_solicit_t)) { |
|
1345 |
/* Options present */ |
|
1346 |
opt = (nd_opt_hdr_t *)&ns[1]; |
|
1347 |
len -= sizeof (nd_neighbor_solicit_t); |
|
1348 |
if (!ndp_verify_optlen(opt, len)) { |
|
1349 |
ip1dbg(("ndp_input_solicit: Bad opt len\n")); |
|
1350 |
bad_solicit = B_TRUE; |
|
1351 |
goto done; |
|
1352 |
} |
|
1353 |
} |
|
1354 |
if (IN6_IS_ADDR_UNSPECIFIED(&src)) { |
|
1355 |
/* Check to see if this is a valid DAD solicitation */ |
|
1356 |
if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { |
|
1357 |
if (ip_debug > 2) { |
|
1358 |
/* ip1dbg */ |
|
1359 |
pr_addr_dbg("ndp_input_solicit: IPv6 " |
|
1360 |
"Destination is not solicited node " |
|
1361 |
"multicast %s\n", AF_INET6, |
|
1362 |
&ip6h->ip6_dst); |
|
1363 |
} |
|
1364 |
bad_solicit = B_TRUE; |
|
1365 |
goto done; |
|
1366 |
} |
|
1367 |
} |
|
1368 |
||
1369 |
our_nce = ndp_lookup(ill, &target, B_FALSE); |
|
1370 |
/* |
|
1371 |
* If this is a valid Solicitation, a permanent |
|
1372 |
* entry should exist in the cache |
|
1373 |
*/ |
|
1374 |
if (our_nce == NULL || |
|
1375 |
!(our_nce->nce_flags & NCE_F_PERMANENT)) { |
|
1376 |
ip1dbg(("ndp_input_solicit: Wrong target in NS?!" |
|
1377 |
"ifname=%s ", ill->ill_name)); |
|
1378 |
if (ip_debug > 2) { |
|
1379 |
/* ip1dbg */ |
|
1380 |
pr_addr_dbg(" dst %s\n", AF_INET6, &target); |
|
1381 |
} |
|
1382 |
bad_solicit = B_TRUE; |
|
1383 |
goto done; |
|
1384 |
} |
|
1385 |
||
1386 |
/* At this point we should have a verified NS per spec */ |
|
1387 |
if (opt != NULL) { |
|
1388 |
opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); |
|
1389 |
if (opt != NULL) { |
|
1390 |
/* |
|
1391 |
* No source link layer address option should |
|
1392 |
* be present in a valid DAD request. |
|
1393 |
*/ |
|
1394 |
if (IN6_IS_ADDR_UNSPECIFIED(&src)) { |
|
1395 |
ip1dbg(("ndp_input_solicit: source link-layer " |
|
1396 |
"address option present with an " |
|
1397 |
"unspecified source. \n")); |
|
1398 |
bad_solicit = B_TRUE; |
|
1399 |
goto done; |
|
1400 |
} |
|
1401 |
haddr = (uchar_t *)&opt[1]; |
|
1402 |
if (hlen > opt->nd_opt_len * 8 || |
|
1403 |
hlen == 0) { |
|
1404 |
bad_solicit = B_TRUE; |
|
1405 |
goto done; |
|
1406 |
} |
|
1407 |
} |
|
1408 |
} |
|
1409 |
/* Set override flag, it will be reset later if need be. */ |
|
1410 |
flag |= NDP_ORIDE; |
|
1411 |
if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { |
|
1412 |
flag |= NDP_UNICAST; |
|
1413 |
} |
|
1414 |
||
1415 |
/* |
|
1416 |
* Create/update the entry for the soliciting node. |
|
1417 |
* or respond to outstanding queries, don't if |
|
1418 |
* the source is unspecified address. |
|
1419 |
*/ |
|
1420 |
if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { |
|
1421 |
int err = 0; |
|
1422 |
nce_t *nnce; |
|
1423 |
||
1424 |
err = ndp_lookup_then_add(ill, |
|
1425 |
haddr, |
|
1426 |
&src, /* Soliciting nodes address */ |
|
1427 |
&ipv6_all_ones, |
|
1428 |
&ipv6_all_zeros, |
|
1429 |
0, |
|
1430 |
0, |
|
1431 |
ND_STALE, |
|
1432 |
&nnce); |
|
1433 |
switch (err) { |
|
1434 |
case 0: |
|
1435 |
/* done with this entry */ |
|
1436 |
NCE_REFRELE(nnce); |
|
1437 |
break; |
|
1438 |
case EEXIST: |
|
1439 |
/* |
|
1440 |
* B_FALSE indicates this is not an |
|
1441 |
* an advertisement. |
|
1442 |
*/ |
|
1443 |
ndp_process(nnce, haddr, 0, B_FALSE); |
|
1444 |
NCE_REFRELE(nnce); |
|
1445 |
break; |
|
1446 |
default: |
|
1447 |
ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", |
|
1448 |
err)); |
|
1449 |
goto done; |
|
1450 |
} |
|
1451 |
flag |= NDP_SOLICITED; |
|
1452 |
} else { |
|
1453 |
/* |
|
1454 |
* This is a DAD req, multicast the advertisement |
|
1455 |
* to the all-nodes address. |
|
1456 |
*/ |
|
1457 |
src = ipv6_all_hosts_mcast; |
|
1458 |
} |
|
1459 |
if (our_nce->nce_flags & NCE_F_ISROUTER) |
|
1460 |
flag |= NDP_ISROUTER; |
|
1461 |
if (our_nce->nce_flags & NCE_F_PROXY) |
|
1462 |
flag &= ~NDP_ORIDE; |
|
1463 |
/* Response to a solicitation */ |
|
1464 |
(void) nce_xmit(ill, |
|
1465 |
ND_NEIGHBOR_ADVERT, |
|
1466 |
ill, /* ill to be used for extracting ill_nd_lla */ |
|
1467 |
B_TRUE, /* use ill_nd_lla */ |
|
1468 |
&target, /* Source and target of the advertisement pkt */ |
|
1469 |
&src, /* IP Destination (source of original pkt) */ |
|
1470 |
flag); |
|
1471 |
done: |
|
1472 |
if (bad_solicit) |
|
1473 |
BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); |
|
1474 |
if (our_nce != NULL) |
|
1475 |
NCE_REFRELE(our_nce); |
|
1476 |
} |
|
1477 |
||
1478 |
void |
|
1479 |
ndp_input_advert(ill_t *ill, mblk_t *mp) |
|
1480 |
{ |
|
1481 |
nd_neighbor_advert_t *na; |
|
1482 |
uint32_t hlen = ill->ill_nd_lla_len; |
|
1483 |
uchar_t *haddr = NULL; |
|
1484 |
icmp6_t *icmp_nd; |
|
1485 |
ip6_t *ip6h; |
|
1486 |
nce_t *dst_nce = NULL; |
|
1487 |
in6_addr_t target; |
|
1488 |
nd_opt_hdr_t *opt = NULL; |
|
1489 |
int len; |
|
1490 |
mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; |
|
1491 |
||
1492 |
ip6h = (ip6_t *)mp->b_rptr; |
|
1493 |
icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
|
1494 |
len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; |
|
1495 |
na = (nd_neighbor_advert_t *)icmp_nd; |
|
1496 |
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && |
|
1497 |
(na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { |
|
1498 |
ip1dbg(("ndp_input_advert: Target is multicast but the " |
|
1499 |
"solicited flag is not zero\n")); |
|
1500 |
BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); |
|
1501 |
return; |
|
1502 |
} |
|
1503 |
target = na->nd_na_target; |
|
1504 |
if (IN6_IS_ADDR_MULTICAST(&target)) { |
|
1505 |
ip1dbg(("ndp_input_advert: Target is multicast!\n")); |
|
1506 |
BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); |
|
1507 |
return; |
|
1508 |
} |
|
1509 |
if (len > sizeof (nd_neighbor_advert_t)) { |
|
1510 |
opt = (nd_opt_hdr_t *)&na[1]; |
|
1511 |
if (!ndp_verify_optlen(opt, |
|
1512 |
len - sizeof (nd_neighbor_advert_t))) { |
|
1513 |
BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); |
|
1514 |
return; |
|
1515 |
} |
|
1516 |
/* At this point we have a verified NA per spec */ |
|
1517 |
len -= sizeof (nd_neighbor_advert_t); |
|
1518 |
opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); |
|
1519 |
if (opt != NULL) { |
|
1520 |
haddr = (uchar_t *)&opt[1]; |
|
1521 |
if (hlen > opt->nd_opt_len * 8 || |
|
1522 |
hlen == 0) { |
|
1523 |
BUMP_MIB(mib, |
|
1524 |
ipv6IfIcmpInBadNeighborAdvertisements); |
|
1525 |
return; |
|
1526 |
} |
|
1527 |
} |
|
1528 |
} |
|
1529 |
||
1530 |
/* |
|
1531 |
* If this interface is part of the group look at all the |
|
1532 |
* ills in the group. |
|
1533 |
*/ |
|
1534 |
rw_enter(&ill_g_lock, RW_READER); |
|
1535 |
if (ill->ill_group != NULL) |
|
1536 |
ill = ill->ill_group->illgrp_ill; |
|
1537 |
||
1538 |
for (; ill != NULL; ill = ill->ill_group_next) { |
|
1539 |
mutex_enter(&ill->ill_lock); |
|
1540 |
if (!ILL_CAN_LOOKUP(ill)) { |
|
1541 |
mutex_exit(&ill->ill_lock); |
|
1542 |
continue; |
|
1543 |
} |
|
1544 |
ill_refhold_locked(ill); |
|
1545 |
mutex_exit(&ill->ill_lock); |
|
1546 |
dst_nce = ndp_lookup(ill, &target, B_FALSE); |
|
1547 |
/* We have to drop the lock since ndp_process calls put* */ |
|
1548 |
rw_exit(&ill_g_lock); |
|
1549 |
if (dst_nce != NULL) { |
|
1550 |
if (na->nd_na_flags_reserved & |
|
1551 |
ND_NA_FLAG_ROUTER) { |
|
1552 |
dst_nce->nce_flags |= NCE_F_ISROUTER; |
|
1553 |
} |
|
1554 |
/* B_TRUE indicates this an advertisement */ |
|
1555 |
ndp_process(dst_nce, haddr, |
|
1556 |
na->nd_na_flags_reserved, B_TRUE); |
|
1557 |
NCE_REFRELE(dst_nce); |
|
1558 |
} |
|
1559 |
rw_enter(&ill_g_lock, RW_READER); |
|
1560 |
ill_refrele(ill); |
|
1561 |
} |
|
1562 |
rw_exit(&ill_g_lock); |
|
1563 |
} |
|
1564 |
||
1565 |
/* |
|
1566 |
* Process NDP neighbor solicitation/advertisement messages. |
|
1567 |
* The checksum has already checked o.k before reaching here. |
|
1568 |
*/ |
|
1569 |
void |
|
1570 |
ndp_input(ill_t *ill, mblk_t *mp) |
|
1571 |
{ |
|
1572 |
icmp6_t *icmp_nd; |
|
1573 |
ip6_t *ip6h; |
|
1574 |
int len; |
|
1575 |
mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; |
|
1576 |
||
1577 |
||
1578 |
if (!pullupmsg(mp, -1)) { |
|
1579 |
ip1dbg(("ndp_input: pullupmsg failed\n")); |
|
1580 |
BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); |
|
1581 |
goto done; |
|
1582 |
} |
|
1583 |
ip6h = (ip6_t *)mp->b_rptr; |
|
1584 |
if (ip6h->ip6_hops != IPV6_MAX_HOPS) { |
|
1585 |
ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); |
|
1586 |
BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); |
|
1587 |
goto done; |
|
1588 |
} |
|
1589 |
/* |
|
1590 |
* NDP does not accept any extension headers between the |
|
1591 |
* IP header and the ICMP header since e.g. a routing |
|
1592 |
* header could be dangerous. |
|
1593 |
* This assumes that any AH or ESP headers are removed |
|
1594 |
* by ip prior to passing the packet to ndp_input. |
|
1595 |
*/ |
|
1596 |
if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { |
|
1597 |
ip1dbg(("ndp_input: Wrong next header 0x%x\n", |
|
1598 |
ip6h->ip6_nxt)); |
|
1599 |
BUMP_MIB(mib, ipv6IfIcmpInErrors); |
|
1600 |
goto done; |
|
1601 |
} |
|
1602 |
icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); |
|
1603 |
ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || |
|
1604 |
icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); |
|
1605 |
if (icmp_nd->icmp6_code != 0) { |
|
1606 |
ip1dbg(("ndp_input: icmp6 code != 0 \n")); |
|
1607 |
BUMP_MIB(mib, ipv6IfIcmpInErrors); |
|
1608 |
goto done; |
|
1609 |
} |
|
1610 |
len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; |
|
1611 |
/* |
|
1612 |
* Make sure packet length is large enough for either |
|
1613 |
* a NS or a NA icmp packet. |
|
1614 |
*/ |
|
1615 |
if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { |
|
1616 |
ip1dbg(("ndp_input: packet too short\n")); |
|
1617 |
BUMP_MIB(mib, ipv6IfIcmpInErrors); |
|
1618 |
goto done; |
|
1619 |
} |
|
1620 |
if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { |
|
1621 |
ndp_input_solicit(ill, mp); |
|
1622 |
} else { |
|
1623 |
ndp_input_advert(ill, mp); |
|
1624 |
} |
|
1625 |
done: |
|
1626 |
freemsg(mp); |
|
1627 |
} |
|
1628 |
||
1629 |
/* |
|
1630 |
* nce_xmit is called to form and transmit a ND solicitation or |
|
1631 |
* advertisement ICMP packet. |
|
1632 |
* If source address is unspecified, appropriate source address |
|
1633 |
* and link layer address will be chosen here. This function |
|
1634 |
* *always* sends the link layer option. |
|
1635 |
* It returns B_FALSE only if it does a successful put() to the |
|
1636 |
* corresponding ill's ill_wq otherwise returns B_TRUE. |
|
1637 |
*/ |
|
1638 |
static boolean_t |
|
1639 |
nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, |
|
1640 |
boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, |
|
1641 |
int flag) |
|
1642 |
{ |
|
1643 |
uint32_t len; |
|
1644 |
icmp6_t *icmp6; |
|
1645 |
mblk_t *mp; |
|
1646 |
ip6_t *ip6h; |
|
1647 |
nd_opt_hdr_t *opt; |
|
1648 |
uint_t plen; |
|
1649 |
ip6i_t *ip6i; |
|
1650 |
ipif_t *src_ipif = NULL; |
|
1651 |
||
1652 |
/* |
|
1653 |
* If we have a unspecified source(sender) address, select a |
|
1654 |
* proper source address for the solicitation here itself so |
|
1655 |
* that we can initialize the h/w address correctly. This is |
|
1656 |
* needed for interface groups as source address can come from |
|
1657 |
* the whole group and the h/w address initialized from ill will |
|
1658 |
* be wrong if the source address comes from a different ill. |
|
1659 |
* |
|
1660 |
* Note that the NA never comes here with the unspecified source |
|
1661 |
* address. The following asserts that whenever the source |
|
1662 |
* address is specified, the haddr also should be specified. |
|
1663 |
*/ |
|
1664 |
ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); |
|
1665 |
||
1666 |
if (IN6_IS_ADDR_UNSPECIFIED(sender)) { |
|
1667 |
ASSERT(operation != ND_NEIGHBOR_ADVERT); |
|
1668 |
/* |
|
1669 |
* Pick a source address for this solicitation, but |
|
1670 |
* restrict the selection to addresses assigned to the |
|
1671 |
* output interface (or interface group). We do this |
|
1672 |
* because the destination will create a neighbor cache |
|
1673 |
* entry for the source address of this packet, so the |
|
1674 |
* source address had better be a valid neighbor. |
|
1675 |
*/ |
|
1676 |
src_ipif = ipif_select_source_v6(ill, target, B_TRUE, |
|
1677 |
IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); |
|
1678 |
if (src_ipif == NULL) { |
|
1679 |
char buf[INET6_ADDRSTRLEN]; |
|
1680 |
||
1681 |
ip0dbg(("nce_xmit: No source ipif for dst %s\n", |
|
1682 |
inet_ntop(AF_INET6, (char *)target, buf, |
|
1683 |
sizeof (buf)))); |
|
1684 |
return (B_TRUE); |
|
1685 |
} |
|
1686 |
sender = &src_ipif->ipif_v6src_addr; |
|
1687 |
hwaddr_ill = src_ipif->ipif_ill; |
|
1688 |
} |
|
1689 |
||
1690 |
plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8; |
|
1691 |
/* |
|
1692 |
* Always make sure that the NS/NA packets don't get load |
|
1693 |
* spread. This is needed so that the probe packets sent |
|
1694 |
* by the in.mpathd daemon can really go out on the desired |
|
1695 |
* interface. Probe packets are made to go out on a desired |
|
1696 |
* interface by including a ip6i with ATTACH_IF flag. As these |
|
1697 |
* packets indirectly end up sending/receiving NS/NA packets |
|
1698 |
* (neighbor doing NUD), we have to make sure that NA |
|
1699 |
* also go out on the same interface. |
|
1700 |
*/ |
|
1701 |
len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + |
|
1702 |
plen * 8; |
|
1703 |
mp = allocb(len, BPRI_LO); |
|
1704 |
if (mp == NULL) { |
|
1705 |
if (src_ipif != NULL) |
|
1706 |
ipif_refrele(src_ipif); |
|
1707 |
return (B_TRUE); |
|
1708 |
} |
|
1709 |
bzero((char *)mp->b_rptr, len); |
|
1710 |
mp->b_wptr = mp->b_rptr + len; |
|
1711 |
||
1712 |
ip6i = (ip6i_t *)mp->b_rptr; |
|
1713 |
ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; |
|
1714 |
ip6i->ip6i_nxt = IPPROTO_RAW; |
|
1715 |
ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; |
|
1716 |
ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; |
|
1717 |
||
1718 |
ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); |
|
1719 |
ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; |
|
1720 |
ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); |
|
1721 |
ip6h->ip6_nxt = IPPROTO_ICMPV6; |
|
1722 |
ip6h->ip6_hops = IPV6_MAX_HOPS; |
|
1723 |
ip6h->ip6_dst = *target; |
|
1724 |
icmp6 = (icmp6_t *)&ip6h[1]; |
|
1725 |
||
1726 |
opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + |
|
1727 |
sizeof (nd_neighbor_advert_t)); |
|
1728 |
||
1729 |
if (operation == ND_NEIGHBOR_SOLICIT) { |
|
1730 |
nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; |
|
1731 |
||
1732 |
opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; |
|
1733 |
ip6h->ip6_src = *sender; |
|
1734 |
ns->nd_ns_target = *target; |
|
1735 |
if (!(flag & NDP_UNICAST)) { |
|
1736 |
/* Form multicast address of the target */ |
|
1737 |
ip6h->ip6_dst = ipv6_solicited_node_mcast; |
|
1738 |
ip6h->ip6_dst.s6_addr32[3] |= |
|
1739 |
ns->nd_ns_target.s6_addr32[3]; |
|
1740 |
} |
|
1741 |
} else { |
|
1742 |
nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; |
|
1743 |
||
1744 |
opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; |
|
1745 |
ip6h->ip6_src = *sender; |
|
1746 |
na->nd_na_target = *sender; |
|
1747 |
if (flag & NDP_ISROUTER) |
|
1748 |
na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; |
|
1749 |
if (flag & NDP_SOLICITED) |
|
1750 |
na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; |
|
1751 |
if (flag & NDP_ORIDE) |
|
1752 |
na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; |
|
1753 |
||
1754 |
} |
|
1755 |
/* Fill in link layer address and option len */ |
|
1756 |
opt->nd_opt_len = (uint8_t)plen; |
|
1757 |
mutex_enter(&hwaddr_ill->ill_lock); |
|
1758 |
bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr, |
|
1759 |
&opt[1], hwaddr_ill->ill_nd_lla_len); |
|
1760 |
mutex_exit(&hwaddr_ill->ill_lock); |
|
1761 |
icmp6->icmp6_type = (uint8_t)operation; |
|
1762 |
icmp6->icmp6_code = 0; |
|
1763 |
/* |
|
1764 |
* Prepare for checksum by putting icmp length in the icmp |
|
1765 |
* checksum field. The checksum is calculated in ip_wput_v6. |
|
1766 |
*/ |
|
1767 |
icmp6->icmp6_cksum = ip6h->ip6_plen; |
|
1768 |
||
1769 |
if (src_ipif != NULL) |
|
1770 |
ipif_refrele(src_ipif); |
|
1771 |
if (canput(ill->ill_wq)) { |
|
1772 |
put(ill->ill_wq, mp); |
|
1773 |
return (B_FALSE); |
|
1774 |
} |
|
1775 |
freemsg(mp); |
|
1776 |
return (B_TRUE); |
|
1777 |
} |
|
1778 |
||
1779 |
/* |
|
1780 |
* Make a link layer address (does not include the SAP) from an nce. |
|
1781 |
* To form the link layer address, use the last four bytes of ipv6 |
|
1782 |
* address passed in and the fixed offset stored in nce. |
|
1783 |
*/ |
|
1784 |
static void |
|
1785 |
nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) |
|
1786 |
{ |
|
1787 |
uchar_t *mask, *to; |
|
1788 |
ill_t *ill = nce->nce_ill; |
|
1789 |
int len; |
|
1790 |
||
1791 |
if (ill->ill_net_type == IRE_IF_NORESOLVER) |
|
1792 |
return; |
|
1793 |
ASSERT(nce->nce_res_mp != NULL); |
|
1794 |
ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); |
|
1795 |
ASSERT(nce->nce_flags & NCE_F_MAPPING); |
|
1796 |
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); |
|
1797 |
ASSERT(addr != NULL); |
|
1798 |
bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), |
|
1799 |
addrpos, ill->ill_nd_lla_len); |
|
1800 |
len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, |
|
1801 |
IPV6_ADDR_LEN); |
|
1802 |
mask = (uchar_t *)&nce->nce_extract_mask; |
|
1803 |
mask += (IPV6_ADDR_LEN - len); |
|
1804 |
addr += (IPV6_ADDR_LEN - len); |
|
1805 |
to = addrpos + nce->nce_ll_extract_start; |
|
1806 |
while (len-- > 0) |
|
1807 |
*to++ |= *mask++ & *addr++; |
|
1808 |
} |
|
1809 |
||
1810 |
/* |
|
1811 |
* Pass a cache report back out via NDD. |
|
1812 |
*/ |
|
1813 |
/* ARGSUSED */ |
|
1814 |
int |
|
1815 |
ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) |
|
1816 |
{ |
|
1817 |
(void) mi_mpprintf(mp, "ifname hardware addr flags" |
|
1818 |
" proto addr/mask"); |
|
1819 |
ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); |
|
1820 |
return (0); |
|
1821 |
} |
|
1822 |
||
1823 |
/* |
|
1824 |
* convert a link level address of arbitrary length |
|
1825 |
* to an ascii string. |
|
1826 |
* The caller *must* have already verified that the string buffer |
|
1827 |
* is large enough to hold the entire string, including trailing NULL. |
|
1828 |
*/ |
|
1829 |
static void |
|
1830 |
lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf) |
|
1831 |
{ |
|
1832 |
uchar_t addrbyte[8]; /* needs to hold ascii for a byte plus a NULL */ |
|
1833 |
int i; |
|
1834 |
size_t len; |
|
1835 |
||
1836 |
buf[0] = '\0'; |
|
1837 |
for (i = 0; i < addrlen; i++) { |
|
1838 |
addrbyte[0] = '\0'; |
|
1839 |
(void) sprintf((char *)addrbyte, "%02x:", (lla[i] & 0xff)); |
|
1840 |
len = strlen((const char *)addrbyte); |
|
1841 |
bcopy(addrbyte, buf, len); |
|
1842 |
buf = buf + len; |
|
1843 |
} |
|
1844 |
*--buf = '\0'; |
|
1845 |
} |
|
1846 |
||
1847 |
/* |
|
1848 |
* Add a single line to the NDP Cache Entry Report. |
|
1849 |
*/ |
|
1850 |
static void |
|
1851 |
nce_report1(nce_t *nce, uchar_t *mp_arg) |
|
1852 |
{ |
|
1853 |
ill_t *ill = nce->nce_ill; |
|
1854 |
char local_buf[INET6_ADDRSTRLEN]; |
|
1855 |
uchar_t flags_buf[10]; |
|
1856 |
uint32_t flags = nce->nce_flags; |
|
1857 |
mblk_t *mp = (mblk_t *)mp_arg; |
|
1858 |
uchar_t *h; |
|
1859 |
uchar_t *m = flags_buf; |
|
1860 |
in6_addr_t v6addr; |
|
1861 |
||
1862 |
/* |
|
1863 |
* Lock the nce to protect nce_res_mp from being changed |
|
1864 |
* if an external resolver address resolution completes |
|
1865 |
* while nce_res_mp is being accessed here. |
|
1866 |
* |
|
1867 |
* Deal with all address formats, not just Ethernet-specific |
|
1868 |
* In addition, make sure that the mblk has enough space |
|
1869 |
* before writing to it. If is doesn't, allocate a new one. |
|
1870 |
*/ |
|
1871 |
ASSERT(ill != NULL); |
|
1872 |
v6addr = nce->nce_mask; |
|
1873 |
if (flags & NCE_F_PERMANENT) |
|
1874 |
*m++ = 'P'; |
|
1875 |
if (flags & NCE_F_ISROUTER) |
|
1876 |
*m++ = 'R'; |
|
1877 |
if (flags & NCE_F_MAPPING) |
|
1878 |
*m++ = 'M'; |
|
1879 |
*m = '\0'; |
|
1880 |
||
1881 |
if (ill->ill_net_type == IRE_IF_RESOLVER) { |
|
1882 |
size_t addrlen; |
|
1883 |
uchar_t *addr_buf; |
|
1884 |
dl_unitdata_req_t *dl; |
|
1885 |
||
1886 |
mutex_enter(&nce->nce_lock); |
|
1887 |
h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); |
|
1888 |
dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; |
|
1889 |
if (ill->ill_flags & ILLF_XRESOLV) |
|
1890 |
addrlen = (3 * (dl->dl_dest_addr_length)); |
|
1891 |
else |
|
1892 |
addrlen = (3 * (ill->ill_nd_lla_len)); |
|
1893 |
if (addrlen <= 0) { |
|
1894 |
mutex_exit(&nce->nce_lock); |
|
1895 |
(void) mi_mpprintf(mp, |
|
1896 |
"%8s %9s %5s %s/%d", |
|
1897 |
ill->ill_name, |
|
1898 |
"None", |
|
1899 |
(uchar_t *)&flags_buf, |
|
1900 |
inet_ntop(AF_INET6, (char *)&nce->nce_addr, |
|
1901 |
(char *)local_buf, sizeof (local_buf)), |
|
1902 |
ip_mask_to_plen_v6(&v6addr)); |
|
1903 |
} else { |
|
1904 |
/* |
|
1905 |
* Convert the hardware/lla address to ascii |
|
1906 |
*/ |
|
1907 |
addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); |
|
1908 |
if (addr_buf == NULL) { |
|
1909 |
mutex_exit(&nce->nce_lock); |
|
1910 |
return; |
|
1911 |
} |
|
1912 |
if (ill->ill_flags & ILLF_XRESOLV) |
|
1913 |
lla2ascii((uint8_t *)h, dl->dl_dest_addr_length, |
|
1914 |
addr_buf); |
|
1915 |
else |
|
1916 |
lla2ascii((uint8_t *)h, ill->ill_nd_lla_len, |
|
1917 |
addr_buf); |
|
1918 |
mutex_exit(&nce->nce_lock); |
|
1919 |
(void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", |
|
1920 |
ill->ill_name, addr_buf, (uchar_t *)&flags_buf, |
|
1921 |
inet_ntop(AF_INET6, (char *)&nce->nce_addr, |
|
1922 |
(char *)local_buf, sizeof (local_buf)), |
|
1923 |
ip_mask_to_plen_v6(&v6addr)); |
|
1924 |
kmem_free(addr_buf, addrlen); |
|
1925 |
} |
|
1926 |
} else { |
|
1927 |
(void) mi_mpprintf(mp, |
|
1928 |
"%8s %9s %5s %s/%d", |
|
1929 |
ill->ill_name, |
|
1930 |
"None", |
|
1931 |
(uchar_t *)&flags_buf, |
|
1932 |
inet_ntop(AF_INET6, (char *)&nce->nce_addr, |
|
1933 |
(char *)local_buf, sizeof (local_buf)), |
|
1934 |
ip_mask_to_plen_v6(&v6addr)); |
|
1935 |
} |
|
1936 |
} |
|
1937 |
||
1938 |
mblk_t * |
|
1939 |
nce_udreq_alloc(ill_t *ill) |
|
1940 |
{ |
|
1941 |
mblk_t *template_mp = NULL; |
|
1942 |
dl_unitdata_req_t *dlur; |
|
1943 |
int sap_length; |
|
1944 |
||
1945 |
sap_length = ill->ill_sap_length; |
|
1946 |
template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + |
|
1947 |
ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); |
|
1948 |
if (template_mp == NULL) |
|
1949 |
return (NULL); |
|
1950 |
||
1951 |
dlur = (dl_unitdata_req_t *)template_mp->b_rptr; |
|
1952 |
dlur->dl_priority.dl_min = 0; |
|
1953 |
dlur->dl_priority.dl_max = 0; |
|
1954 |
dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; |
|
1955 |
dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); |
|
1956 |
||
1957 |
/* Copy in the SAP value. */ |
|
1958 |
NCE_LL_SAP_COPY(ill, template_mp); |
|
1959 |
||
1960 |
return (template_mp); |
|
1961 |
} |
|
1962 |
||
1963 |
/* |
|
1964 |
* NDP retransmit timer. |
|
1965 |
* This timer goes off when: |
|
1966 |
* a. It is time to retransmit NS for resolver. |
|
1967 |
* b. It is time to send reachability probes. |
|
1968 |
*/ |
|
1969 |
void |
|
1970 |
ndp_timer(void *arg) |
|
1971 |
{ |
|
1972 |
nce_t *nce = arg; |
|
1973 |
ill_t *ill = nce->nce_ill; |
|
1974 |
uint32_t ms; |
|
1975 |
char addrbuf[INET6_ADDRSTRLEN]; |
|
1976 |
mblk_t *mp; |
|
1977 |
boolean_t dropped = B_FALSE; |
|
1978 |
||
1979 |
/* |
|
1980 |
* The timer has to be cancelled by ndp_delete before doing the final |
|
1981 |
* refrele. So the NCE is guaranteed to exist when the timer runs |
|
1982 |
* until it clears the timeout_id. Before clearing the timeout_id |
|
1983 |
* bump up the refcnt so that we can continue to use the nce |
|
1984 |
*/ |
|
1985 |
ASSERT(nce != NULL); |
|
1986 |
||
1987 |
/* |
|
1988 |
* Grab the ill_g_lock now itself to avoid lock order problems. |
|
1989 |
* nce_solicit needs ill_g_lock to be able to traverse ills |
|
1990 |
*/ |
|
1991 |
rw_enter(&ill_g_lock, RW_READER); |
|
1992 |
mutex_enter(&nce->nce_lock); |
|
1993 |
NCE_REFHOLD_LOCKED(nce); |
|
1994 |
nce->nce_timeout_id = 0; |
|
1995 |
||
1996 |
/* |
|
1997 |
* Check the reachability state first. |
|
1998 |
*/ |
|
1999 |
switch (nce->nce_state) { |
|
2000 |
case ND_DELAY: |
|
2001 |
rw_exit(&ill_g_lock); |
|
2002 |
nce->nce_state = ND_PROBE; |
|
2003 |
mutex_exit(&nce->nce_lock); |
|
2004 |
(void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, |
|
2005 |
&ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); |
|
2006 |
if (ip_debug > 3) { |
|
2007 |
/* ip2dbg */ |
|
2008 |
pr_addr_dbg("ndp_timer: state for %s changed " |
|
2009 |
"to PROBE\n", AF_INET6, &nce->nce_addr); |
|
2010 |
} |
|
2011 |
NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); |
|
2012 |
NCE_REFRELE(nce); |
|
2013 |
return; |
|
2014 |
case ND_PROBE: |
|
2015 |
/* must be retransmit timer */ |
|
2016 |
rw_exit(&ill_g_lock); |
|
2017 |
nce->nce_pcnt--; |
|
2018 |
ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && |
|
2019 |
nce->nce_pcnt >= -1); |
|
2020 |
if (nce->nce_pcnt == 0) { |
|
2021 |
/* Wait RetransTimer, before deleting the entry */ |
|
2022 |
ip2dbg(("ndp_timer: pcount=%x dst %s\n", |
|
2023 |
nce->nce_pcnt, inet_ntop(AF_INET6, |
|
2024 |
&nce->nce_addr, addrbuf, sizeof (addrbuf)))); |
|
2025 |
mutex_exit(&nce->nce_lock); |
|
2026 |
NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); |
|
2027 |
} else { |
|
2028 |
/* |
|
2029 |
* As per RFC2461, the nce gets deleted after |
|
2030 |
* MAX_UNICAST_SOLICIT unsuccessful re-transmissions. |
|
2031 |
* Note that the first unicast solicitation is sent |
|
2032 |
* during the DELAY state. |
|
2033 |
*/ |
|
2034 |
if (nce->nce_pcnt > 0) { |
|
2035 |
ip2dbg(("ndp_timer: pcount=%x dst %s\n", |
|
2036 |
nce->nce_pcnt, inet_ntop(AF_INET6, |
|
2037 |
&nce->nce_addr, |
|
2038 |
addrbuf, sizeof (addrbuf)))); |
|
2039 |
mutex_exit(&nce->nce_lock); |
|
2040 |
dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, |
|
2041 |
NULL, B_FALSE, &ipv6_all_zeros, |
|
2042 |
&nce->nce_addr, NDP_UNICAST); |
|
2043 |
if (dropped) { |
|
2044 |
mutex_enter(&nce->nce_lock); |
|
2045 |
nce->nce_pcnt++; |
|
2046 |
mutex_exit(&nce->nce_lock); |
|
2047 |
} |
|
2048 |
NDP_RESTART_TIMER(nce, |
|
2049 |
ill->ill_reachable_retrans_time); |
|
2050 |
} else { |
|
2051 |
/* No hope, delete the nce */ |
|
2052 |
nce->nce_state = ND_UNREACHABLE; |
|
2053 |
mutex_exit(&nce->nce_lock); |
|
2054 |
if (ip_debug > 2) { |
|
2055 |
/* ip1dbg */ |
|
2056 |
pr_addr_dbg("ndp_timer: Delete IRE for" |
|
2057 |
" dst %s\n", AF_INET6, |
|
2058 |
&nce->nce_addr); |
|
2059 |
} |
|
2060 |
ndp_delete(nce); |
|
2061 |
} |
|
2062 |
} |
|
2063 |
NCE_REFRELE(nce); |
|
2064 |
return; |
|
2065 |
case ND_INCOMPLETE: |
|
2066 |
/* |
|
2067 |
* Must be resolvers retransmit timer. |
|
2068 |
*/ |
|
2069 |
for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { |
|
2070 |
ip6i_t *ip6i; |
|
2071 |
ip6_t *ip6h; |
|
2072 |
mblk_t *data_mp; |
|
2073 |
||
2074 |
/* |
|
2075 |
* Walk the list of packets queued, and see if there |
|
2076 |
* are any multipathing probe packets. Such packets |
|
2077 |
* are always queued at the head. Since this is a |
|
2078 |
* retransmit timer firing, mark such packets as |
|
2079 |
* delayed in ND resolution. This info will be used |
|
2080 |
* in ip_wput_v6(). Multipathing probe packets will |
|
2081 |
* always have an ip6i_t. Once we hit a packet without |
|
2082 |
* it, we can break out of this loop. |
|
2083 |
*/ |
|
2084 |
if (mp->b_datap->db_type == M_CTL) |
|
2085 |
data_mp = mp->b_cont; |
|
2086 |
else |
|
2087 |
data_mp = mp; |
|
2088 |
||
2089 |
ip6h = (ip6_t *)data_mp->b_rptr; |
|
2090 |
if (ip6h->ip6_nxt != IPPROTO_RAW) |
|
2091 |
break; |
|
2092 |
||
2093 |
/* |
|
2094 |
* This message should have been pulled up already in |
|
2095 |
* ip_wput_v6. We can't do pullups here because the |
|
2096 |
* b_next/b_prev is non-NULL. |
|
2097 |
*/ |
|
2098 |
ip6i = (ip6i_t *)ip6h; |
|
2099 |
ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= |
|
2100 |
sizeof (ip6i_t) + IPV6_HDR_LEN); |
|
2101 |
||
2102 |
/* Mark this packet as delayed due to ND resolution */ |
|
2103 |
if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) |
|
2104 |
ip6i->ip6i_flags |= IP6I_ND_DELAYED; |
|
2105 |
} |
|
2106 |
if (nce->nce_qd_mp != NULL) { |
|
2107 |
ms = nce_solicit(nce, NULL); |
|
2108 |
rw_exit(&ill_g_lock); |
|
2109 |
if (ms == 0) { |
|
2110 |
if (nce->nce_state != ND_REACHABLE) { |
|
2111 |
mutex_exit(&nce->nce_lock); |
|
2112 |
nce_resolv_failed(nce); |
|
2113 |
ndp_delete(nce); |
|
2114 |
} else { |
|
2115 |
mutex_exit(&nce->nce_lock); |
|
2116 |
} |
|
2117 |
} else { |
|
2118 |
mutex_exit(&nce->nce_lock); |
|
2119 |
NDP_RESTART_TIMER(nce, (clock_t)ms); |
|
2120 |
} |
|
2121 |
NCE_REFRELE(nce); |
|
2122 |
return; |
|
2123 |
} |
|
2124 |
mutex_exit(&nce->nce_lock); |
|
2125 |
rw_exit(&ill_g_lock); |
|
2126 |
NCE_REFRELE(nce); |
|
2127 |
break; |
|
2128 |
case ND_REACHABLE : |
|
2129 |
rw_exit(&ill_g_lock); |
|
2130 |
if (nce->nce_flags & NCE_F_UNSOL_ADV && |
|
2131 |
nce->nce_unsolicit_count != 0) { |
|
2132 |
nce->nce_unsolicit_count--; |
|
2133 |
mutex_exit(&nce->nce_lock); |
|
2134 |
dropped = nce_xmit(ill, |
|
2135 |
ND_NEIGHBOR_ADVERT, |
|
2136 |
ill, /* ill to be used for hw addr */ |
|
2137 |
B_FALSE, /* use ill_phys_addr */ |
|
2138 |
&nce->nce_addr, |
|
2139 |
&ipv6_all_hosts_mcast, |
|
2140 |
nce->nce_flags | NDP_ORIDE); |
|
2141 |
if (dropped) { |
|
2142 |
mutex_enter(&nce->nce_lock); |
|
2143 |
nce->nce_unsolicit_count++; |
|
2144 |
mutex_exit(&nce->nce_lock); |
|
2145 |
} |
|
2146 |
if (nce->nce_unsolicit_count != 0) { |
|
2147 |
NDP_RESTART_TIMER(nce, |
|
2148 |
ip_ndp_unsolicit_interval); |
|
2149 |
} |
|
2150 |
} else { |
|
2151 |
mutex_exit(&nce->nce_lock); |
|
2152 |
} |
|
2153 |
NCE_REFRELE(nce); |
|
2154 |
break; |
|
2155 |
default: |
|
2156 |
rw_exit(&ill_g_lock); |
|
2157 |
mutex_exit(&nce->nce_lock); |
|
2158 |
NCE_REFRELE(nce); |
|
2159 |
break; |
|
2160 |
} |
|
2161 |
} |
|
2162 |
||
2163 |
/* |
|
2164 |
* Set a link layer address from the ll_addr passed in. |
|
2165 |
* Copy SAP from ill. |
|
2166 |
*/ |
|
2167 |
static void |
|
2168 |
nce_set_ll(nce_t *nce, uchar_t *ll_addr) |
|
2169 |
{ |
|
2170 |
ill_t *ill = nce->nce_ill; |
|
2171 |
uchar_t *woffset; |
|
2172 |
||
2173 |
ASSERT(ll_addr != NULL); |
|
2174 |
/* Always called before fast_path_probe */ |
|
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
2175 |
ASSERT(nce->nce_fp_mp == NULL); |
0 | 2176 |
if (ill->ill_sap_length != 0) { |
2177 |
/* |
|
2178 |
* Copy the SAP type specified in the |
|
2179 |
* request into the xmit template. |
|
2180 |
*/ |
|
2181 |
NCE_LL_SAP_COPY(ill, nce->nce_res_mp); |
|
2182 |
} |
|
2183 |
if (ill->ill_phys_addr_length > 0) { |
|
2184 |
/* |
|
2185 |
* The bcopy() below used to be called for the physical address |
|
2186 |
* length rather than the link layer address length. For |
|
2187 |
* ethernet and many other media, the phys_addr and lla are |
|
2188 |
* identical. |
|
2189 |
* However, with xresolv interfaces being introduced, the |
|
2190 |
* phys_addr and lla are no longer the same, and the physical |
|
2191 |
* address may not have any useful meaning, so we use the lla |
|
2192 |
* for IPv6 address resolution and destination addressing. |
|
2193 |
* |
|
2194 |
* For PPP or other interfaces with a zero length |
|
2195 |
* physical address, don't do anything here. |
|
2196 |
* The bcopy() with a zero phys_addr length was previously |
|
2197 |
* a no-op for interfaces with a zero-length physical address. |
|
2198 |
* Using the lla for them would change the way they operate. |
|
2199 |
* Doing nothing in such cases preserves expected behavior. |
|
2200 |
*/ |
|
2201 |
woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); |
|
2202 |
bcopy(ll_addr, woffset, ill->ill_nd_lla_len); |
|
2203 |
} |
|
2204 |
} |
|
2205 |
||
2206 |
static boolean_t |
|
2207 |
nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len) |
|
2208 |
{ |
|
2209 |
ill_t *ill = nce->nce_ill; |
|
2210 |
uchar_t *ll_offset; |
|
2211 |
||
2212 |
ASSERT(nce->nce_res_mp != NULL); |
|
2213 |
if (ll_addr == NULL) |
|
2214 |
return (B_FALSE); |
|
2215 |
ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); |
|
2216 |
if (bcmp(ll_addr, (char *)ll_offset, ll_addr_len) != 0) |
|
2217 |
return (B_TRUE); |
|
2218 |
return (B_FALSE); |
|
2219 |
} |
|
2220 |
||
2221 |
/* |
|
2222 |
* Updates the link layer address or the reachability state of |
|
2223 |
* a cache entry. Reset probe counter if needed. |
|
2224 |
*/ |
|
2225 |
static void |
|
2226 |
nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) |
|
2227 |
{ |
|
2228 |
ill_t *ill = nce->nce_ill; |
|
2229 |
boolean_t need_stop_timer = B_FALSE; |
|
2230 |
boolean_t need_fastpath_update = B_FALSE; |
|
2231 |
||
2232 |
ASSERT(MUTEX_HELD(&nce->nce_lock)); |
|
2233 |
/* |
|
2234 |
* If this interface does not do NUD, there is no point |
|
2235 |
* in allowing an update to the cache entry. Although |
|
2236 |
* we will respond to NS. |
|
2237 |
* The only time we accept an update for a resolver when |
|
2238 |
* NUD is turned off is when it has just been created. |
|
2239 |
* Non-Resolvers will always be created as REACHABLE. |
|
2240 |
*/ |
|
2241 |
if (new_state != ND_UNCHANGED) { |
|
2242 |
if ((nce->nce_flags & NCE_F_NONUD) && |
|
2243 |
(nce->nce_state != ND_INCOMPLETE)) |
|
2244 |
return; |
|
2245 |
ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); |
|
2246 |
ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); |
|
2247 |
need_stop_timer = B_TRUE; |
|
2248 |
if (new_state == ND_REACHABLE) |
|
2249 |
nce->nce_last = TICK_TO_MSEC(lbolt64); |
|
2250 |
else { |
|
2251 |
/* We force NUD in this case */ |
|
2252 |
nce->nce_last = 0; |
|
2253 |
} |
|
2254 |
nce->nce_state = new_state; |
|
2255 |
nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; |
|
2256 |
} |
|
2257 |
/* |
|
2258 |
* In case of fast path we need to free the the fastpath |
|
2259 |
* M_DATA and do another probe. Otherwise we can just |
|
2260 |
* overwrite the DL_UNITDATA_REQ data, noting we'll lose |
|
2261 |
* whatever packets that happens to be transmitting at the time. |
|
2262 |
*/ |
|
2263 |
if (new_ll_addr != NULL) { |
|
2264 |
ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + |
|
2265 |
ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); |
|
2266 |
bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + |
|
2267 |
NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); |
|
2268 |
if (nce->nce_fp_mp != NULL) { |
|
2269 |
freemsg(nce->nce_fp_mp); |
|
2270 |
nce->nce_fp_mp = NULL; |
|
2271 |
} |
|
741
40027a3621ac
PSARC 2005/082 Yosemite: UDP Performance Enhancement
masputra
parents:
0
diff
changeset
|
2272 |
need_fastpath_update = B_TRUE; |
0 | 2273 |
} |
2274 |
mutex_exit(&nce->nce_lock); |
|
2275 |
if (need_stop_timer) { |
|
2276 |
(void) untimeout(nce->nce_timeout_id); |
|
2277 |
nce->nce_timeout_id = 0; |
|
2278 |
} |
|
2279 |
if (need_fastpath_update) |
|
2280 |
nce_fastpath(nce); |
|
2281 |
mutex_enter(&nce->nce_lock); |
|
2282 |
} |
|
2283 |
||
2284 |
static void |
|
2285 |
nce_queue_mp(nce_t *nce, mblk_t *mp) |
|
2286 |
{ |
|
2287 |
uint_t count = 0; |
|
2288 |
mblk_t **mpp; |
|
2289 |
boolean_t head_insert = B_FALSE; |
|
2290 |
ip6_t *ip6h; |
|
2291 |
ip6i_t *ip6i; |
|
2292 |
mblk_t *data_mp; |
|
2293 |
||
2294 |
ASSERT(MUTEX_HELD(&nce->nce_lock)); |
|
2295 |
||
2296 |
if (mp->b_datap->db_type == M_CTL) |
|
2297 |
data_mp = mp->b_cont; |
|
2298 |
else |
|
2299 |
data_mp = mp; |
|
2300 |
ip6h = (ip6_t *)data_mp->b_rptr; |
|
2301 |
if (ip6h->ip6_nxt == IPPROTO_RAW) { |
|
2302 |
/* |
|
2303 |
* This message should have been pulled up already in |
|
2304 |
* ip_wput_v6. We can't do pullups here because the message |
|
2305 |
* could be from the nce_qd_mp which could have b_next/b_prev |
|
2306 |
* non-NULL. |
|
2307 |
*/ |
|
2308 |
ip6i = (ip6i_t *)ip6h; |
|
2309 |
ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= |
|
2310 |
sizeof (ip6i_t) + IPV6_HDR_LEN); |
|
2311 |
/* |
|
2312 |
* Multipathing probe packets have IP6I_DROP_IFDELAYED set. |
|
2313 |
* This has 2 aspects mentioned below. |
|
2314 |
* 1. Perform head insertion in the nce_qd_mp for these packets. |
|
2315 |
* This ensures that next retransmit of ND solicitation |
|
2316 |
* will use the interface specified by the probe packet, |
|
2317 |
* for both NS and NA. This corresponds to the src address |
|
2318 |
* in the IPv6 packet. If we insert at tail, we will be |
|
2319 |
* depending on the packet at the head for successful |
|
2320 |
* ND resolution. This is not reliable, because the interface |
|
2321 |
* on which the NA arrives could be different from the interface |
|
2322 |
* on which the NS was sent, and if the receiving interface is |
|
2323 |
* failed, it will appear that the sending interface is also |
|
2324 |
* failed, causing in.mpathd to misdiagnose this as link |
|
2325 |
* failure. |
|
2326 |
* 2. Drop the original packet, if the ND resolution did not |
|
2327 |
* succeed in the first attempt. However we will create the |
|
2328 |
* nce and the ire, as soon as the ND resolution succeeds. |
|
2329 |
* We don't gain anything by queueing multiple probe packets |
|
2330 |
* and sending them back-to-back once resolution succeeds. |
|
2331 |
* It is sufficient to send just 1 packet after ND resolution |
|
2332 |
* succeeds. Since mpathd is sending down probe packets at a |
|
2333 |
* constant rate, we don't need to send the queued packet. We |
|
2334 |
* need to queue it only for NDP resolution. The benefit of |
|
2335 |
* dropping the probe packets that were delayed in ND |
|
2336 |
* resolution, is that in.mpathd will not see inflated |
|
2337 |
* RTT. If the ND resolution does not succeed within |
|
2338 |
* in.mpathd's failure detection time, mpathd may detect |
|
2339 |
* a failure, and it does not matter whether the packet |
|
2340 |
* was queued or dropped. |
|
2341 |
*/ |
|
2342 |
if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) |
|
2343 |
head_insert = B_TRUE; |
|
2344 |
} |
|
2345 |
||
2346 |
for (mpp = &nce->nce_qd_mp; *mpp != NULL; |
|
2347 |
mpp = &(*mpp)->b_next) { |
|
2348 |
if (++count > |
|
2349 |
nce->nce_ill->ill_max_buf) { |
|
2350 |
mblk_t *tmp = nce->nce_qd_mp->b_next; |
|
2351 |
||
2352 |
nce->nce_qd_mp->b_next = NULL; |
|
2353 |
nce->nce_qd_mp->b_prev = NULL; |
|
2354 |
freemsg(nce->nce_qd_mp); |
|
2355 |
ip1dbg(("nce_queue_mp: pkt dropped\n")); |
|
2356 |
nce->nce_qd_mp = tmp; |
|
2357 |
} |
|
2358 |
} |
|
2359 |
/* put this on the list */ |
|
2360 |
if (head_insert) { |
|
2361 |
mp->b_next = nce->nce_qd_mp; |
|
2362 |
nce->nce_qd_mp = mp; |
|
2363 |
} else { |
|
2364 |
*mpp = mp; |
|
2365 |
} |
|
2366 |
} |
|
2367 |
||
2368 |
/* |
|
2369 |
* Called when address resolution failed due to a timeout. |
|
2370 |
* Send an ICMP unreachable in response to all queued packets. |
|
2371 |
*/ |
|
2372 |
void |
|
2373 |
nce_resolv_failed(nce_t *nce) |
|
2374 |
{ |
|
2375 |
mblk_t *mp, *nxt_mp, *first_mp; |
|
2376 |
char buf[INET6_ADDRSTRLEN]; |
|
2377 |
ip6_t *ip6h; |
|
2378 |
zoneid_t zoneid = GLOBAL_ZONEID; |
|
2379 |
||
2380 |
ip1dbg(("nce_resolv_failed: dst %s\n", |
|
2381 |
inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); |
|
2382 |
mutex_enter(&nce->nce_lock); |
|
2383 |
mp = nce->nce_qd_mp; |
|
2384 |
nce->nce_qd_mp = NULL; |
|
2385 |
mutex_exit(&nce->nce_lock); |
|
2386 |
while (mp != NULL) { |
|
2387 |
nxt_mp = mp->b_next; |
|
2388 |
mp->b_next = NULL; |
|
2389 |
mp->b_prev = NULL; |
|
2390 |
||
2391 |
first_mp = mp; |
|
2392 |
if (mp->b_datap->db_type == M_CTL) { |
|
2393 |
ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; |
|
2394 |
ASSERT(io->ipsec_out_type == IPSEC_OUT); |
|
2395 |
zoneid = io->ipsec_out_zoneid; |
|
2396 |
ASSERT(zoneid != ALL_ZONES); |
|
2397 |
mp = mp->b_cont; |
|
2398 |
} |
|
2399 |
||
2400 |
ip6h = (ip6_t *)mp->b_rptr; |
|
2401 |
if (ip6h->ip6_nxt == IPPROTO_RAW) { |
|
2402 |
ip6i_t *ip6i; |
|
2403 |
/* |
|
2404 |
* This message should have been pulled up already |
|
2405 |
* in ip_wput_v6. ip_hdr_complete_v6 assumes that |
|
2406 |
* the header is pulled up. |
|
2407 |
*/ |
|
2408 |
ip6i = (ip6i_t *)ip6h; |
|
2409 |
ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= |
|
2410 |
sizeof (ip6i_t) + IPV6_HDR_LEN); |
|
2411 |
mp->b_rptr += sizeof (ip6i_t); |
|
2412 |
} |
|
2413 |
/* |
|
2414 |
* Ignore failure since icmp_unreachable_v6 will silently |
|
2415 |
* drop packets with an unspecified source address. |
|
2416 |
*/ |
|
2417 |
(void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); |
|
2418 |
icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, |
|
2419 |
ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE); |
|
2420 |
mp = nxt_mp; |
|
2421 |
} |
|
2422 |
} |
|
2423 |
||
2424 |
/* |
|
2425 |
* Called by SIOCSNDP* ioctl to add/change an nce entry |
|
2426 |
* and the corresponding attributes. |
|
2427 |
* Disallow states other than ND_REACHABLE or ND_STALE. |
|
2428 |
*/ |
|
2429 |
int |
|
2430 |
ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) |
|
2431 |
{ |
|
2432 |
sin6_t *sin6; |
|
2433 |
in6_addr_t *addr; |
|
2434 |
nce_t *nce; |
|
2435 |
int err; |
|
2436 |
uint16_t new_flags = 0; |
|
2437 |
uint16_t old_flags = 0; |
|
2438 |
int inflags = lnr->lnr_flags; |
|
2439 |
||
2440 |
if ((lnr->lnr_state_create != ND_REACHABLE) && |
|
2441 |
(lnr->lnr_state_create != ND_STALE)) |
|
2442 |
return (EINVAL); |
|
2443 |
||
2444 |
sin6 = (sin6_t *)&lnr->lnr_addr; |
|
2445 |
addr = &sin6->sin6_addr; |
|
2446 |
||
2447 |
mutex_enter(&ndp_g_lock); |
|
2448 |
/* We know it can not be mapping so just look in the hash table */ |
|
2449 |
nce = nce_lookup_addr(ill, addr); |
|
2450 |
if (nce != NULL) |
|
2451 |
new_flags = nce->nce_flags; |
|
2452 |
||
2453 |
switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { |
|
2454 |
case NDF_ISROUTER_ON: |
|
2455 |
new_flags |= NCE_F_ISROUTER; |
|
2456 |
break; |
|
2457 |
case NDF_ISROUTER_OFF: |
|
2458 |
new_flags &= ~NCE_F_ISROUTER; |
|
2459 |
break; |
|
2460 |
case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): |
|
2461 |
mutex_exit(&ndp_g_lock); |
|
2462 |
if (nce != NULL) |
|
2463 |
NCE_REFRELE(nce); |
|
2464 |
return (EINVAL); |
|
2465 |
} |
|
2466 |
||
2467 |
switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { |
|
2468 |
case NDF_ANYCAST_ON: |
|
2469 |
new_flags |= NCE_F_ANYCAST; |
|
2470 |
break; |
|
2471 |
case NDF_ANYCAST_OFF: |
|
2472 |
new_flags &= ~NCE_F_ANYCAST; |
|
2473 |
break; |
|
2474 |
case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): |
|
2475 |
mutex_exit(&ndp_g_lock); |
|
2476 |
if (nce != NULL) |
|
2477 |
NCE_REFRELE(nce); |
|
2478 |
return (EINVAL); |
|
2479 |
} |
|
2480 |
||
2481 |
switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { |
|
2482 |
case NDF_PROXY_ON: |
|
2483 |
new_flags |= NCE_F_PROXY; |
|
2484 |
break; |
|
2485 |
case NDF_PROXY_OFF: |
|
2486 |
new_flags &= ~NCE_F_PROXY; |
|
2487 |
break; |
|
2488 |
case (NDF_PROXY_OFF|NDF_PROXY_ON): |
|
2489 |
mutex_exit(&ndp_g_lock); |
|
2490 |
if (nce != NULL) |
|
2491 |
NCE_REFRELE(nce); |
|
2492 |
return (EINVAL); |
|
2493 |
} |
|
2494 |
||
2495 |
if (nce == NULL) { |
|
2496 |
err = ndp_add(ill, |
|
2497 |
(uchar_t *)lnr->lnr_hdw_addr, |
|
2498 |
addr, |
|
2499 |
&ipv6_all_ones, |
|
2500 |
&ipv6_all_zeros, |
|
2501 |
0, |
|
2502 |
new_flags, |
|
2503 |
lnr->lnr_state_create, |
|
2504 |
&nce); |
|
2505 |
if (err != 0) { |
|
2506 |
mutex_exit(&ndp_g_lock); |
|
2507 |
ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); |
|
2508 |
return (err); |
|
2509 |
} |
|
2510 |
} |
|
2511 |
old_flags = nce->nce_flags; |
|
2512 |
if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { |
|
2513 |
/* |
|
2514 |
* Router turned to host, delete all ires. |
|
2515 |
* XXX Just delete the entry, but we need to add too. |
|
2516 |
*/ |
|
2517 |
nce->nce_flags &= ~NCE_F_ISROUTER; |
|
2518 |
mutex_exit(&ndp_g_lock); |
|
2519 |
ndp_delete(nce); |
|
2520 |
NCE_REFRELE(nce); |
|
2521 |
return (0); |
|
2522 |
} |
|
2523 |
mutex_exit(&ndp_g_lock); |
|
2524 |
||
2525 |
mutex_enter(&nce->nce_lock); |
|
2526 |
nce->nce_flags = new_flags; |
|
2527 |
mutex_exit(&nce->nce_lock); |
|
2528 |
/* |
|
2529 |
* Note that we ignore the state at this point, which |
|
2530 |
* should be either STALE or REACHABLE. Instead we let |
|
2531 |
* the link layer address passed in to determine the state |
|
2532 |
* much like incoming packets. |
|
2533 |
*/ |
|
2534 |
ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); |
|
2535 |
NCE_REFRELE(nce); |
|
2536 |
return (0); |
|
2537 |
} |
|
2538 |
||
2539 |
/* |
|
2540 |
* If the device driver supports it, we make nce_fp_mp to have |
|
2541 |
* an M_DATA prepend. Otherwise nce_fp_mp will be null. |
|
2542 |
* The caller insures there is hold on nce for this function. |
|
2543 |
* Note that since ill_fastpath_probe() copies the mblk there is |
|
2544 |
* no need for the hold beyond this function. |
|
2545 |
*/ |
|
2546 |
static void |
|
2547 |
nce_fastpath(nce_t *nce) |
|
2548 |
{ |
|
2549 |
ill_t *ill = nce->nce_ill; |
|
2550 |
int res; |
|
2551 |
||
2552 |
ASSERT(ill != NULL); |
|
2553 |
if (nce->nce_fp_mp != NULL) { |
|
2554 |
/* Already contains fastpath info */ |
|
2555 |
return; |
|
2556 |
} |
|
2557 |
if (nce->nce_res_mp != NULL) { |
|
2558 |
nce_fastpath_list_add(nce); |
|
2559 |
res = ill_fastpath_probe(ill, nce->nce_res_mp); |
|
2560 |
/* |
|
2561 |
* EAGAIN is an indication of a transient error |
|
2562 |
* i.e. allocation failure etc. leave the nce in the list it |
|
2563 |
* will be updated when another probe happens for another ire |
|
2564 |
* if not it will be taken out of the list when the ire is |
|
2565 |
* deleted. |
|
2566 |
*/ |
|
2567 |
||
2568 |
if (res != 0 && res != EAGAIN) |
|
2569 |
nce_fastpath_list_delete(nce); |
|
2570 |
} |
|
2571 |
} |
|
2572 |
||
2573 |
/* |
|
2574 |
* Drain the list of nce's waiting for fastpath response. |
|
2575 |
*/ |
|
2576 |
void |
|
2577 |
nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), |
|
2578 |
void *arg) |
|
2579 |
{ |
|
2580 |
||
2581 |
nce_t *next_nce; |
|
2582 |
nce_t *current_nce; |
|
2583 |
nce_t *first_nce; |
|
2584 |
nce_t *prev_nce = NULL; |
|
2585 |
||
2586 |
ASSERT(ill != NULL); |
|
2587 |
||
2588 |
mutex_enter(&ill->ill_lock); |
|
2589 |
first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; |
|
2590 |
while (current_nce != (nce_t *)&ill->ill_fastpath_list) { |
|
2591 |
next_nce = current_nce->nce_fastpath; |
|
2592 |
/* |
|
2593 |
* Take it off the list if we're flushing, or if the callback |
|
2594 |
* routine tells us to do so. Otherwise, leave the nce in the |
|
2595 |
* fastpath list to handle any pending response from the lower |
|
2596 |
* layer. We can't drain the list when the callback routine |
|
2597 |
* comparison failed, because the response is asynchronous in |
|
2598 |
* nature, and may not arrive in the same order as the list |
|
2599 |
* insertion. |
|
2600 |
*/ |
|
2601 |
if (func == NULL || func(current_nce, arg)) { |
|
2602 |
current_nce->nce_fastpath = NULL; |
|
2603 |
if (current_nce == first_nce) |
|
2604 |
ill->ill_fastpath_list = first_nce = next_nce; |
|
2605 |
else |
|
2606 |
prev_nce->nce_fastpath = next_nce; |
|
2607 |
} else { |
|
2608 |
/* previous element that is still in the list */ |
|
2609 |
prev_nce = current_nce; |
|
2610 |
} |
|
2611 |
current_nce = next_nce; |
|
2612 |
} |
|
2613 |
mutex_exit(&ill->ill_lock); |
|
2614 |
} |
|
2615 |
||
2616 |
/* |
|
2617 |
* Add nce to the nce fastpath list. |
|
2618 |
*/ |
|
2619 |
void |
|
2620 |
nce_fastpath_list_add(nce_t *nce) |
|
2621 |
{ |
|
2622 |
ill_t *ill; |
|
2623 |
||
2624 |
ill = nce->nce_ill; |
|
2625 |
ASSERT(ill != NULL); |
|
2626 |
||
2627 |
mutex_enter(&ill->ill_lock); |
|
2628 |
mutex_enter(&nce->nce_lock); |
|
2629 |
||
2630 |
/* |
|
2631 |
* if nce has not been deleted and |
|
2632 |
* is not already in the list add it. |
|
2633 |
*/ |
|
2634 |
if (!(nce->nce_flags & NCE_F_CONDEMNED) && |
|
2635 |
(nce->nce_fastpath == NULL)) { |
|
2636 |
nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; |
|
2637 |
ill->ill_fastpath_list = nce; |
|
2638 |
} |
|
2639 |
||
2640 |
mutex_exit(&nce->nce_lock); |
|
2641 |
mutex_exit(&ill->ill_lock); |
|
2642 |
} |
|
2643 |
||
2644 |
/* |
|
2645 |
* remove nce from the nce fastpath list. |
|
2646 |
*/ |
|
2647 |
void |
|
2648 |
nce_fastpath_list_delete(nce_t *nce) |
|
2649 |
{ |
|
2650 |
nce_t *nce_ptr; |
|
2651 |
||
2652 |
ill_t *ill; |
|
2653 |
||
2654 |
ill = nce->nce_ill; |
|
2655 |
ASSERT(ill != NULL); |
|
2656 |
||
2657 |
mutex_enter(&ill->ill_lock); |
|
2658 |
if (nce->nce_fastpath == NULL) |
|
2659 |
goto done; |
|
2660 |
||
2661 |
ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); |
|
2662 |
||
2663 |
if (ill->ill_fastpath_list == nce) { |
|
2664 |
ill->ill_fastpath_list = nce->nce_fastpath; |
|
2665 |
} else { |
|
2666 |
nce_ptr = ill->ill_fastpath_list; |
|
2667 |
while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { |
|
2668 |
if (nce_ptr->nce_fastpath == nce) { |
|
2669 |
nce_ptr->nce_fastpath = nce->nce_fastpath; |
|
2670 |
break; |
|
2671 |
} |
|
2672 |
nce_ptr = nce_ptr->nce_fastpath; |
|
2673 |
} |
|
2674 |
} |
|
2675 |
||
2676 |
nce->nce_fastpath = NULL; |
|
2677 |
done: |
|
2678 |
mutex_exit(&ill->ill_lock); |
|
2679 |
} |
|
2680 |
||
2681 |
/* |
|
2682 |
* Update all NCE's that are not in fastpath mode and |
|
2683 |
* have an nce_fp_mp that matches mp. mp->b_cont contains |
|
2684 |
* the fastpath header. |
|
2685 |
* |
|
2686 |
* Returns TRUE if entry should be dequeued, or FALSE otherwise. |
|
2687 |
*/ |
|
2688 |
boolean_t |
|
2689 |
ndp_fastpath_update(nce_t *nce, void *arg) |
|
2690 |
{ |
|
2691 |
mblk_t *mp, *fp_mp; |
|
2692 |
uchar_t *mp_rptr, *ud_mp_rptr; |
|
2693 |
mblk_t *ud_mp = nce->nce_res_mp; |
|
2694 |
ptrdiff_t cmplen; |
|
2695 |
||
2696 |
if (nce->nce_flags & NCE_F_MAPPING) |
|
2697 |
return (B_TRUE); |
|
2698 |
if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) |
|
2699 |
return (B_TRUE); |
|
2700 |
||
2701 |
ip2dbg(("ndp_fastpath_update: trying\n")); |
|
2702 |
mp = (mblk_t *)arg; |
|
2703 |
mp_rptr = mp->b_rptr; |
|
2704 |
cmplen = mp->b_wptr - mp_rptr; |
|
2705 |
ASSERT(cmplen >= 0); |
|
2706 |
ud_mp_rptr = ud_mp->b_rptr; |
|
2707 |
/* |
|
2708 |
* The nce is locked here to prevent any other threads |
|
2709 |
* from accessing and changing nce_res_mp when the IPv6 address |
|
2710 |
* becomes resolved to an lla while we're in the middle |
|
2711 |
* of looking at and comparing the hardware address (lla). |
|
2712 |
* It is also locked to prevent multiple threads in nce_fastpath_update |
|
2713 |
* from examining nce_res_mp atthe same time. |
|
2714 |
*/ |
|
2715 |
mutex_enter(&nce->nce_lock); |
|
2716 |
if (ud_mp->b_wptr - ud_mp_rptr != cmplen || |
|
2717 |
bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { |
|
2718 |
mutex_exit(&nce->nce_lock); |
|
2719 |
/* |
|
2720 |
* Don't take the ire off the fastpath list yet, |
|
2721 |
* since the response may come later. |
|
2722 |
*/ |
|
2723 |
return (B_FALSE); |
|
2724 |
} |
|
2725 |
/* Matched - install mp as the fastpath mp */ |
|
2726 |
ip1dbg(("ndp_fastpath_update: match\n")); |
|
2727 |
fp_mp = dupb(mp->b_cont); |
|
2728 |
if (fp_mp != NULL) { |
|
2729 |
nce->nce_fp_mp = fp_mp; |
|
2730 |
} |
|
2731 |
mutex_exit(&nce->nce_lock); |
|
2732 |
return (B_TRUE); |
|
2733 |
} |
|
2734 |
||
2735 |
/* |
|
2736 |
* This function handles the DL_NOTE_FASTPATH_FLUSH notification from |
|
2737 |
* driver. Note that it assumes IP is exclusive... |
|
2738 |
*/ |
|
2739 |
/* ARGSUSED */ |
|
2740 |
void |
|
2741 |
ndp_fastpath_flush(nce_t *nce, char *arg) |
|
2742 |
{ |
|
2743 |
if (nce->nce_flags & NCE_F_MAPPING) |
|
2744 |
return; |
|
2745 |
/* No fastpath info? */ |
|
2746 |
if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) |
|
2747 |
return; |
|
2748 |
||
2749 |
/* Just delete the NCE... */ |
|
2750 |
ndp_delete(nce); |
|
2751 |
} |
|
2752 |
||
2753 |
/* |
|
2754 |
* Return a pointer to a given option in the packet. |
|
2755 |
* Assumes that option part of the packet have already been validated. |
|
2756 |
*/ |
|
2757 |
nd_opt_hdr_t * |
|
2758 |
ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) |
|
2759 |
{ |
|
2760 |
while (optlen > 0) { |
|
2761 |
if (opt->nd_opt_type == opt_type) |
|
2762 |
return (opt); |
|
2763 |
optlen -= 8 * opt->nd_opt_len; |
|
2764 |
opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); |
|
2765 |
} |
|
2766 |
return (NULL); |
|
2767 |
} |
|
2768 |
||
2769 |
/* |
|
2770 |
* Verify all option lengths present are > 0, also check to see |
|
2771 |
* if the option lengths and packet length are consistent. |
|
2772 |
*/ |
|
2773 |
boolean_t |
|
2774 |
ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) |
|
2775 |
{ |
|
2776 |
ASSERT(opt != NULL); |
|
2777 |
while (optlen > 0) { |
|
2778 |
if (opt->nd_opt_len == 0) |
|
2779 |
return (B_FALSE); |
|
2780 |
optlen -= 8 * opt->nd_opt_len; |
|
2781 |
if (optlen < 0) |
|
2782 |
return (B_FALSE); |
|
2783 |
opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); |
|
2784 |
} |
|
2785 |
return (B_TRUE); |
|
2786 |
} |
|
2787 |
||
2788 |
/* |
|
2789 |
* ndp_walk function. |
|
2790 |
* Free a fraction of the NCE cache entries. |
|
2791 |
* A fraction of zero means to not free any in that category. |
|
2792 |
*/ |
|
2793 |
void |
|
2794 |
ndp_cache_reclaim(nce_t *nce, char *arg) |
|
2795 |
{ |
|
2796 |
nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; |
|
2797 |
uint_t rand; |
|
2798 |
||
2799 |
if (nce->nce_flags & NCE_F_PERMANENT) |
|
2800 |
return; |
|
2801 |
||
2802 |
rand = (uint_t)lbolt + |
|
2803 |
NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); |
|
2804 |
if (ncr->ncr_host != 0 && |
|
2805 |
(rand/ncr->ncr_host)*ncr->ncr_host == rand) { |
|
2806 |
ndp_delete(nce); |
|
2807 |
return; |
|
2808 |
} |
|
2809 |
} |
|
2810 |
||
2811 |
/* |
|
2812 |
* ndp_walk function. |
|
2813 |
* Count the number of NCEs that can be deleted. |
|
2814 |
* These would be hosts but not routers. |
|
2815 |
*/ |
|
2816 |
void |
|
2817 |
ndp_cache_count(nce_t *nce, char *arg) |
|
2818 |
{ |
|
2819 |
ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; |
|
2820 |
||
2821 |
if (nce->nce_flags & NCE_F_PERMANENT) |
|
2822 |
return; |
|
2823 |
||
2824 |
ncc->ncc_total++; |
|
2825 |
if (!(nce->nce_flags & NCE_F_ISROUTER)) |
|
2826 |
ncc->ncc_host++; |
|
2827 |
} |
|
2828 |
||
2829 |
#ifdef NCE_DEBUG |
|
2830 |
th_trace_t * |
|
2831 |
th_trace_nce_lookup(nce_t *nce) |
|
2832 |
{ |
|
2833 |
int bucket_id; |
|
2834 |
th_trace_t *th_trace; |
|
2835 |
||
2836 |
ASSERT(MUTEX_HELD(&nce->nce_lock)); |
|
2837 |
||
2838 |
bucket_id = IP_TR_HASH(curthread); |
|
2839 |
ASSERT(bucket_id < IP_TR_HASH_MAX); |
|
2840 |
||
2841 |
for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; |
|
2842 |
th_trace = th_trace->th_next) { |
|
2843 |
if (th_trace->th_id == curthread) |
|
2844 |
return (th_trace); |
|
2845 |
} |
|
2846 |
return (NULL); |
|
2847 |
} |
|
2848 |
||
2849 |
void |
|
2850 |
nce_trace_ref(nce_t *nce) |
|
2851 |
{ |
|
2852 |
int bucket_id; |
|
2853 |
th_trace_t *th_trace; |
|
2854 |
||
2855 |
/* |
|
2856 |
* Attempt to locate the trace buffer for the curthread. |
|
2857 |
* If it does not exist, then allocate a new trace buffer |
|
2858 |
* and link it in list of trace bufs for this ipif, at the head |
|
2859 |
*/ |
|
2860 |
ASSERT(MUTEX_HELD(&nce->nce_lock)); |
|
2861 |
||
2862 |
if (nce->nce_trace_disable == B_TRUE) |
|
2863 |
return; |
|
2864 |
||
2865 |
th_trace = th_trace_nce_lookup(nce); |
|
2866 |
if (th_trace == NULL) { |
|
2867 |
bucket_id = IP_TR_HASH(curthread); |
|
2868 |
th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), |
|
2869 |
KM_NOSLEEP); |
|
2870 |
if (th_trace == NULL) { |
|
2871 |
nce->nce_trace_disable = B_TRUE; |
|
2872 |
nce_trace_inactive(nce); |
|
2873 |
return; |
|
2874 |
} |
|
2875 |
th_trace->th_id = curthread; |
|
2876 |
th_trace->th_next = nce->nce_trace[bucket_id]; |
|
2877 |
th_trace->th_prev = &nce->nce_trace[bucket_id]; |
|
2878 |
if (th_trace->th_next != NULL) |
|
2879 |
th_trace->th_next->th_prev = &th_trace->th_next; |
|
2880 |
nce->nce_trace[bucket_id] = th_trace; |
|
2881 |
} |
|
2882 |
ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); |
|
2883 |
th_trace->th_refcnt++; |
|
2884 |
th_trace_rrecord(th_trace); |
|
2885 |
} |
|
2886 |
||
2887 |
void |
|
2888 |
nce_untrace_ref(nce_t *nce) |
|
2889 |
{ |
|
2890 |
th_trace_t *th_trace; |
|
2891 |
||
2892 |
ASSERT(MUTEX_HELD(&nce->nce_lock)); |
|
2893 |
||
2894 |
if (nce->nce_trace_disable == B_TRUE) |
|
2895 |
return; |
|
2896 |
||
2897 |
th_trace = th_trace_nce_lookup(nce); |
|
2898 |
ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); |
|
2899 |
||
2900 |
th_trace_rrecord(th_trace); |
|
2901 |
th_trace->th_refcnt--; |
|
2902 |
} |
|
2903 |
||
2904 |
void |
|
2905 |
nce_trace_inactive(nce_t *nce) |
|
2906 |
{ |
|
2907 |
th_trace_t *th_trace; |
|
2908 |
int i; |
|
2909 |
||
2910 |
ASSERT(MUTEX_HELD(&nce->nce_lock)); |
|
2911 |
||
2912 |
for (i = 0; i < IP_TR_HASH_MAX; i++) { |
|
2913 |
while (nce->nce_trace[i] != NULL) { |
|
2914 |
th_trace = nce->nce_trace[i]; |
|
2915 |
||
2916 |
/* unlink th_trace and free it */ |
|
2917 |
nce->nce_trace[i] = th_trace->th_next; |
|
2918 |
if (th_trace->th_next != NULL) |
|
2919 |
th_trace->th_next->th_prev = |
|
2920 |
&nce->nce_trace[i]; |
|
2921 |
||
2922 |
th_trace->th_next = NULL; |
|
2923 |
th_trace->th_prev = NULL; |
|
2924 |
kmem_free(th_trace, sizeof (th_trace_t)); |
|
2925 |
} |
|
2926 |
} |
|
2927 |
||
2928 |
} |
|
2929 |
||
2930 |
/* ARGSUSED */ |
|
2931 |
int |
|
2932 |
nce_thread_exit(nce_t *nce, caddr_t arg) |
|
2933 |
{ |
|
2934 |
th_trace_t *th_trace; |
|
2935 |
||
2936 |
mutex_enter(&nce->nce_lock); |
|
2937 |
th_trace = th_trace_nce_lookup(nce); |
|
2938 |
||
2939 |
if (th_trace == NULL) { |
|
2940 |
mutex_exit(&nce->nce_lock); |
|
2941 |
return (0); |
|
2942 |
} |
|
2943 |
||
2944 |
ASSERT(th_trace->th_refcnt == 0); |
|
2945 |
||
2946 |
/* unlink th_trace and free it */ |
|
2947 |
*th_trace->th_prev = th_trace->th_next; |
|
2948 |
if (th_trace->th_next != NULL) |
|
2949 |
th_trace->th_next->th_prev = th_trace->th_prev; |
|
2950 |
th_trace->th_next = NULL; |
|
2951 |
th_trace->th_prev = NULL; |
|
2952 |
kmem_free(th_trace, sizeof (th_trace_t)); |
|
2953 |
mutex_exit(&nce->nce_lock); |
|
2954 |
return (0); |
|
2955 |
} |
|
2956 |
#endif |