author | dp |
Thu, 22 Jun 2006 14:42:46 -0700 | |
changeset 2267 | c5d9a656170f |
parent 2110 | 31cba59b38be |
child 2677 | 212d61b14a8b |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1676 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
0 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
390 | 21 |
|
0 | 22 |
/* |
1409
c25d6f2622c9
6366674 zones service common name could be more descriptive
dp
parents:
1166
diff
changeset
|
23 |
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
0 | 24 |
* Use is subject to license terms. |
25 |
*/ |
|
26 |
||
27 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
28 |
||
29 |
/* |
|
30 |
* Zones |
|
31 |
* |
|
32 |
* A zone is a named collection of processes, namespace constraints, |
|
33 |
* and other system resources which comprise a secure and manageable |
|
34 |
* application containment facility. |
|
35 |
* |
|
36 |
* Zones (represented by the reference counted zone_t) are tracked in |
|
37 |
* the kernel in the zonehash. Elsewhere in the kernel, Zone IDs |
|
38 |
* (zoneid_t) are used to track zone association. Zone IDs are |
|
39 |
* dynamically generated when the zone is created; if a persistent |
|
40 |
* identifier is needed (core files, accounting logs, audit trail, |
|
41 |
* etc.), the zone name should be used. |
|
42 |
* |
|
43 |
* |
|
44 |
* Global Zone: |
|
45 |
* |
|
46 |
* The global zone (zoneid 0) is automatically associated with all |
|
47 |
* system resources that have not been bound to a user-created zone. |
|
48 |
* This means that even systems where zones are not in active use |
|
49 |
* have a global zone, and all processes, mounts, etc. are |
|
50 |
* associated with that zone. The global zone is generally |
|
51 |
* unconstrained in terms of privileges and access, though the usual |
|
52 |
* credential and privilege based restrictions apply. |
|
53 |
* |
|
54 |
* |
|
55 |
* Zone States: |
|
56 |
* |
|
57 |
* The states in which a zone may be in and the transitions are as |
|
58 |
* follows: |
|
59 |
* |
|
60 |
* ZONE_IS_UNINITIALIZED: primordial state for a zone. The partially |
|
61 |
* initialized zone is added to the list of active zones on the system but |
|
62 |
* isn't accessible. |
|
63 |
* |
|
64 |
* ZONE_IS_READY: zsched (the kernel dummy process for a zone) is |
|
65 |
* ready. The zone is made visible after the ZSD constructor callbacks are |
|
66 |
* executed. A zone remains in this state until it transitions into |
|
67 |
* the ZONE_IS_BOOTING state as a result of a call to zone_boot(). |
|
68 |
* |
|
69 |
* ZONE_IS_BOOTING: in this shortlived-state, zsched attempts to start |
|
70 |
* init. Should that fail, the zone proceeds to the ZONE_IS_SHUTTING_DOWN |
|
71 |
* state. |
|
72 |
* |
|
73 |
* ZONE_IS_RUNNING: The zone is open for business: zsched has |
|
74 |
* successfully started init. A zone remains in this state until |
|
75 |
* zone_shutdown() is called. |
|
76 |
* |
|
77 |
* ZONE_IS_SHUTTING_DOWN: zone_shutdown() has been called, the system is |
|
78 |
* killing all processes running in the zone. The zone remains |
|
79 |
* in this state until there are no more user processes running in the zone. |
|
80 |
* zone_create(), zone_enter(), and zone_destroy() on this zone will fail. |
|
81 |
* Since zone_shutdown() is restartable, it may be called successfully |
|
82 |
* multiple times for the same zone_t. Setting of the zone's state to |
|
83 |
* ZONE_IS_SHUTTING_DOWN is synchronized with mounts, so VOP_MOUNT() may check |
|
84 |
* the zone's status without worrying about it being a moving target. |
|
85 |
* |
|
86 |
* ZONE_IS_EMPTY: zone_shutdown() has been called, and there |
|
87 |
* are no more user processes in the zone. The zone remains in this |
|
88 |
* state until there are no more kernel threads associated with the |
|
89 |
* zone. zone_create(), zone_enter(), and zone_destroy() on this zone will |
|
90 |
* fail. |
|
91 |
* |
|
92 |
* ZONE_IS_DOWN: All kernel threads doing work on behalf of the zone |
|
93 |
* have exited. zone_shutdown() returns. Henceforth it is not possible to |
|
94 |
* join the zone or create kernel threads therein. |
|
95 |
* |
|
96 |
* ZONE_IS_DYING: zone_destroy() has been called on the zone; zone |
|
97 |
* remains in this state until zsched exits. Calls to zone_find_by_*() |
|
98 |
* return NULL from now on. |
|
99 |
* |
|
100 |
* ZONE_IS_DEAD: zsched has exited (zone_ntasks == 0). There are no |
|
101 |
* processes or threads doing work on behalf of the zone. The zone is |
|
102 |
* removed from the list of active zones. zone_destroy() returns, and |
|
103 |
* the zone can be recreated. |
|
104 |
* |
|
105 |
* ZONE_IS_FREE (internal state): zone_ref goes to 0, ZSD destructor |
|
106 |
* callbacks are executed, and all memory associated with the zone is |
|
107 |
* freed. |
|
108 |
* |
|
109 |
* Threads can wait for the zone to enter a requested state by using |
|
110 |
* zone_status_wait() or zone_status_timedwait() with the desired |
|
111 |
* state passed in as an argument. Zone state transitions are |
|
112 |
* uni-directional; it is not possible to move back to an earlier state. |
|
113 |
* |
|
114 |
* |
|
115 |
* Zone-Specific Data: |
|
116 |
* |
|
117 |
* Subsystems needing to maintain zone-specific data can store that |
|
118 |
* data using the ZSD mechanism. This provides a zone-specific data |
|
119 |
* store, similar to thread-specific data (see pthread_getspecific(3C) |
|
120 |
* or the TSD code in uts/common/disp/thread.c. Also, ZSD can be used |
|
121 |
* to register callbacks to be invoked when a zone is created, shut |
|
122 |
* down, or destroyed. This can be used to initialize zone-specific |
|
123 |
* data for new zones and to clean up when zones go away. |
|
124 |
* |
|
125 |
* |
|
126 |
* Data Structures: |
|
127 |
* |
|
128 |
* The per-zone structure (zone_t) is reference counted, and freed |
|
129 |
* when all references are released. zone_hold and zone_rele can be |
|
130 |
* used to adjust the reference count. In addition, reference counts |
|
131 |
* associated with the cred_t structure are tracked separately using |
|
132 |
* zone_cred_hold and zone_cred_rele. |
|
133 |
* |
|
134 |
* Pointers to active zone_t's are stored in two hash tables; one |
|
135 |
* for searching by id, the other for searching by name. Lookups |
|
136 |
* can be performed on either basis, using zone_find_by_id and |
|
137 |
* zone_find_by_name. Both return zone_t pointers with the zone |
|
138 |
* held, so zone_rele should be called when the pointer is no longer |
|
139 |
* needed. Zones can also be searched by path; zone_find_by_path |
|
140 |
* returns the zone with which a path name is associated (global |
|
141 |
* zone if the path is not within some other zone's file system |
|
142 |
* hierarchy). This currently requires iterating through each zone, |
|
143 |
* so it is slower than an id or name search via a hash table. |
|
144 |
* |
|
145 |
* |
|
146 |
* Locking: |
|
147 |
* |
|
148 |
* zonehash_lock: This is a top-level global lock used to protect the |
|
149 |
* zone hash tables and lists. Zones cannot be created or destroyed |
|
150 |
* while this lock is held. |
|
151 |
* zone_status_lock: This is a global lock protecting zone state. |
|
152 |
* Zones cannot change state while this lock is held. It also |
|
153 |
* protects the list of kernel threads associated with a zone. |
|
154 |
* zone_lock: This is a per-zone lock used to protect several fields of |
|
155 |
* the zone_t (see <sys/zone.h> for details). In addition, holding |
|
156 |
* this lock means that the zone cannot go away. |
|
157 |
* zsd_key_lock: This is a global lock protecting the key state for ZSD. |
|
158 |
* zone_deathrow_lock: This is a global lock protecting the "deathrow" |
|
159 |
* list (a list of zones in the ZONE_IS_DEAD state). |
|
160 |
* |
|
161 |
* Ordering requirements: |
|
162 |
* pool_lock --> cpu_lock --> zonehash_lock --> zone_status_lock --> |
|
163 |
* zone_lock --> zsd_key_lock --> pidlock --> p_lock |
|
164 |
* |
|
165 |
* Blocking memory allocations are permitted while holding any of the |
|
166 |
* zone locks. |
|
167 |
* |
|
168 |
* |
|
169 |
* System Call Interface: |
|
170 |
* |
|
171 |
* The zone subsystem can be managed and queried from user level with |
|
172 |
* the following system calls (all subcodes of the primary "zone" |
|
173 |
* system call): |
|
174 |
* - zone_create: creates a zone with selected attributes (name, |
|
789 | 175 |
* root path, privileges, resource controls, ZFS datasets) |
0 | 176 |
* - zone_enter: allows the current process to enter a zone |
177 |
* - zone_getattr: reports attributes of a zone |
|
2267 | 178 |
* - zone_setattr: set attributes of a zone |
179 |
* - zone_boot: set 'init' running for the zone |
|
0 | 180 |
* - zone_list: lists all zones active in the system |
181 |
* - zone_lookup: looks up zone id based on name |
|
182 |
* - zone_shutdown: initiates shutdown process (see states above) |
|
183 |
* - zone_destroy: completes shutdown process (see states above) |
|
184 |
* |
|
185 |
*/ |
|
186 |
||
187 |
#include <sys/priv_impl.h> |
|
188 |
#include <sys/cred.h> |
|
189 |
#include <c2/audit.h> |
|
190 |
#include <sys/debug.h> |
|
191 |
#include <sys/file.h> |
|
192 |
#include <sys/kmem.h> |
|
193 |
#include <sys/mutex.h> |
|
1676 | 194 |
#include <sys/note.h> |
0 | 195 |
#include <sys/pathname.h> |
196 |
#include <sys/proc.h> |
|
197 |
#include <sys/project.h> |
|
1166 | 198 |
#include <sys/sysevent.h> |
0 | 199 |
#include <sys/task.h> |
200 |
#include <sys/systm.h> |
|
201 |
#include <sys/types.h> |
|
202 |
#include <sys/utsname.h> |
|
203 |
#include <sys/vnode.h> |
|
204 |
#include <sys/vfs.h> |
|
205 |
#include <sys/systeminfo.h> |
|
206 |
#include <sys/policy.h> |
|
207 |
#include <sys/cred_impl.h> |
|
208 |
#include <sys/contract_impl.h> |
|
209 |
#include <sys/contract/process_impl.h> |
|
210 |
#include <sys/class.h> |
|
211 |
#include <sys/pool.h> |
|
212 |
#include <sys/pool_pset.h> |
|
213 |
#include <sys/pset.h> |
|
214 |
#include <sys/sysmacros.h> |
|
215 |
#include <sys/callb.h> |
|
216 |
#include <sys/vmparam.h> |
|
217 |
#include <sys/corectl.h> |
|
218 |
||
219 |
#include <sys/door.h> |
|
220 |
#include <sys/cpuvar.h> |
|
221 |
||
222 |
#include <sys/uadmin.h> |
|
223 |
#include <sys/session.h> |
|
224 |
#include <sys/cmn_err.h> |
|
225 |
#include <sys/modhash.h> |
|
2267 | 226 |
#include <sys/sunddi.h> |
0 | 227 |
#include <sys/nvpair.h> |
228 |
#include <sys/rctl.h> |
|
229 |
#include <sys/fss.h> |
|
230 |
#include <sys/zone.h> |
|
1676 | 231 |
#include <sys/tsol/label.h> |
0 | 232 |
|
233 |
/* |
|
234 |
* cv used to signal that all references to the zone have been released. This |
|
235 |
* needs to be global since there may be multiple waiters, and the first to |
|
236 |
* wake up will free the zone_t, hence we cannot use zone->zone_cv. |
|
237 |
*/ |
|
238 |
static kcondvar_t zone_destroy_cv; |
|
239 |
/* |
|
240 |
* Lock used to serialize access to zone_cv. This could have been per-zone, |
|
241 |
* but then we'd need another lock for zone_destroy_cv, and why bother? |
|
242 |
*/ |
|
243 |
static kmutex_t zone_status_lock; |
|
244 |
||
245 |
/* |
|
246 |
* ZSD-related global variables. |
|
247 |
*/ |
|
248 |
static kmutex_t zsd_key_lock; /* protects the following two */ |
|
249 |
/* |
|
250 |
* The next caller of zone_key_create() will be assigned a key of ++zsd_keyval. |
|
251 |
*/ |
|
252 |
static zone_key_t zsd_keyval = 0; |
|
253 |
/* |
|
254 |
* Global list of registered keys. We use this when a new zone is created. |
|
255 |
*/ |
|
256 |
static list_t zsd_registered_keys; |
|
257 |
||
258 |
int zone_hash_size = 256; |
|
1676 | 259 |
static mod_hash_t *zonehashbyname, *zonehashbyid, *zonehashbylabel; |
0 | 260 |
static kmutex_t zonehash_lock; |
261 |
static uint_t zonecount; |
|
262 |
static id_space_t *zoneid_space; |
|
263 |
||
264 |
/* |
|
265 |
* The global zone (aka zone0) is the all-seeing, all-knowing zone in which the |
|
266 |
* kernel proper runs, and which manages all other zones. |
|
267 |
* |
|
268 |
* Although not declared as static, the variable "zone0" should not be used |
|
269 |
* except for by code that needs to reference the global zone early on in boot, |
|
270 |
* before it is fully initialized. All other consumers should use |
|
271 |
* 'global_zone'. |
|
272 |
*/ |
|
273 |
zone_t zone0; |
|
274 |
zone_t *global_zone = NULL; /* Set when the global zone is initialized */ |
|
275 |
||
276 |
/* |
|
277 |
* List of active zones, protected by zonehash_lock. |
|
278 |
*/ |
|
279 |
static list_t zone_active; |
|
280 |
||
281 |
/* |
|
282 |
* List of destroyed zones that still have outstanding cred references. |
|
283 |
* Used for debugging. Uses a separate lock to avoid lock ordering |
|
284 |
* problems in zone_free. |
|
285 |
*/ |
|
286 |
static list_t zone_deathrow; |
|
287 |
static kmutex_t zone_deathrow_lock; |
|
288 |
||
289 |
/* number of zones is limited by virtual interface limit in IP */ |
|
290 |
uint_t maxzones = 8192; |
|
291 |
||
1166 | 292 |
/* Event channel to sent zone state change notifications */ |
293 |
evchan_t *zone_event_chan; |
|
294 |
||
295 |
/* |
|
296 |
* This table holds the mapping from kernel zone states to |
|
297 |
* states visible in the state notification API. |
|
298 |
* The idea is that we only expose "obvious" states and |
|
299 |
* do not expose states which are just implementation details. |
|
300 |
*/ |
|
301 |
const char *zone_status_table[] = { |
|
302 |
ZONE_EVENT_UNINITIALIZED, /* uninitialized */ |
|
303 |
ZONE_EVENT_READY, /* ready */ |
|
304 |
ZONE_EVENT_READY, /* booting */ |
|
305 |
ZONE_EVENT_RUNNING, /* running */ |
|
306 |
ZONE_EVENT_SHUTTING_DOWN, /* shutting_down */ |
|
307 |
ZONE_EVENT_SHUTTING_DOWN, /* empty */ |
|
308 |
ZONE_EVENT_SHUTTING_DOWN, /* down */ |
|
309 |
ZONE_EVENT_SHUTTING_DOWN, /* dying */ |
|
310 |
ZONE_EVENT_UNINITIALIZED, /* dead */ |
|
311 |
}; |
|
312 |
||
0 | 313 |
/* |
314 |
* This isn't static so lint doesn't complain. |
|
315 |
*/ |
|
316 |
rctl_hndl_t rc_zone_cpu_shares; |
|
317 |
rctl_hndl_t rc_zone_nlwps; |
|
318 |
/* |
|
319 |
* Synchronization primitives used to synchronize between mounts and zone |
|
320 |
* creation/destruction. |
|
321 |
*/ |
|
322 |
static int mounts_in_progress; |
|
323 |
static kcondvar_t mount_cv; |
|
324 |
static kmutex_t mount_lock; |
|
325 |
||
2267 | 326 |
const char * const zone_default_initname = "/sbin/init"; |
1676 | 327 |
static char * const zone_prefix = "/zone/"; |
0 | 328 |
|
329 |
static int zone_shutdown(zoneid_t zoneid); |
|
330 |
||
331 |
/* |
|
813 | 332 |
* Bump this number when you alter the zone syscall interfaces; this is |
333 |
* because we need to have support for previous API versions in libc |
|
334 |
* to support patching; libc calls into the kernel to determine this number. |
|
335 |
* |
|
336 |
* Version 1 of the API is the version originally shipped with Solaris 10 |
|
337 |
* Version 2 alters the zone_create system call in order to support more |
|
338 |
* arguments by moving the args into a structure; and to do better |
|
339 |
* error reporting when zone_create() fails. |
|
340 |
* Version 3 alters the zone_create system call in order to support the |
|
341 |
* import of ZFS datasets to zones. |
|
1676 | 342 |
* Version 4 alters the zone_create system call in order to support |
343 |
* Trusted Extensions. |
|
2267 | 344 |
* Version 5 alters the zone_boot system call, and converts its old |
345 |
* bootargs parameter to be set by the zone_setattr API instead. |
|
813 | 346 |
*/ |
2267 | 347 |
static const int ZONE_SYSCALL_API_VERSION = 5; |
813 | 348 |
|
349 |
/* |
|
0 | 350 |
* Certain filesystems (such as NFS and autofs) need to know which zone |
351 |
* the mount is being placed in. Because of this, we need to be able to |
|
352 |
* ensure that a zone isn't in the process of being created such that |
|
353 |
* nfs_mount() thinks it is in the global zone, while by the time it |
|
354 |
* gets added the list of mounted zones, it ends up on zoneA's mount |
|
355 |
* list. |
|
356 |
* |
|
357 |
* The following functions: block_mounts()/resume_mounts() and |
|
358 |
* mount_in_progress()/mount_completed() are used by zones and the VFS |
|
359 |
* layer (respectively) to synchronize zone creation and new mounts. |
|
360 |
* |
|
361 |
* The semantics are like a reader-reader lock such that there may |
|
362 |
* either be multiple mounts (or zone creations, if that weren't |
|
363 |
* serialized by zonehash_lock) in progress at the same time, but not |
|
364 |
* both. |
|
365 |
* |
|
366 |
* We use cv's so the user can ctrl-C out of the operation if it's |
|
367 |
* taking too long. |
|
368 |
* |
|
369 |
* The semantics are such that there is unfair bias towards the |
|
370 |
* "current" operation. This means that zone creations may starve if |
|
371 |
* there is a rapid succession of new mounts coming in to the system, or |
|
372 |
* there is a remote possibility that zones will be created at such a |
|
373 |
* rate that new mounts will not be able to proceed. |
|
374 |
*/ |
|
375 |
/* |
|
376 |
* Prevent new mounts from progressing to the point of calling |
|
377 |
* VFS_MOUNT(). If there are already mounts in this "region", wait for |
|
378 |
* them to complete. |
|
379 |
*/ |
|
380 |
static int |
|
381 |
block_mounts(void) |
|
382 |
{ |
|
383 |
int retval = 0; |
|
384 |
||
385 |
/* |
|
386 |
* Since it may block for a long time, block_mounts() shouldn't be |
|
387 |
* called with zonehash_lock held. |
|
388 |
*/ |
|
389 |
ASSERT(MUTEX_NOT_HELD(&zonehash_lock)); |
|
390 |
mutex_enter(&mount_lock); |
|
391 |
while (mounts_in_progress > 0) { |
|
392 |
if (cv_wait_sig(&mount_cv, &mount_lock) == 0) |
|
393 |
goto signaled; |
|
394 |
} |
|
395 |
/* |
|
396 |
* A negative value of mounts_in_progress indicates that mounts |
|
397 |
* have been blocked by (-mounts_in_progress) different callers. |
|
398 |
*/ |
|
399 |
mounts_in_progress--; |
|
400 |
retval = 1; |
|
401 |
signaled: |
|
402 |
mutex_exit(&mount_lock); |
|
403 |
return (retval); |
|
404 |
} |
|
405 |
||
406 |
/* |
|
407 |
* The VFS layer may progress with new mounts as far as we're concerned. |
|
408 |
* Allow them to progress if we were the last obstacle. |
|
409 |
*/ |
|
410 |
static void |
|
411 |
resume_mounts(void) |
|
412 |
{ |
|
413 |
mutex_enter(&mount_lock); |
|
414 |
if (++mounts_in_progress == 0) |
|
415 |
cv_broadcast(&mount_cv); |
|
416 |
mutex_exit(&mount_lock); |
|
417 |
} |
|
418 |
||
419 |
/* |
|
420 |
* The VFS layer is busy with a mount; zones should wait until all |
|
421 |
* mounts are completed to progress. |
|
422 |
*/ |
|
423 |
void |
|
424 |
mount_in_progress(void) |
|
425 |
{ |
|
426 |
mutex_enter(&mount_lock); |
|
427 |
while (mounts_in_progress < 0) |
|
428 |
cv_wait(&mount_cv, &mount_lock); |
|
429 |
mounts_in_progress++; |
|
430 |
mutex_exit(&mount_lock); |
|
431 |
} |
|
432 |
||
433 |
/* |
|
434 |
* VFS is done with one mount; wake up any waiting block_mounts() |
|
435 |
* callers if this is the last mount. |
|
436 |
*/ |
|
437 |
void |
|
438 |
mount_completed(void) |
|
439 |
{ |
|
440 |
mutex_enter(&mount_lock); |
|
441 |
if (--mounts_in_progress == 0) |
|
442 |
cv_broadcast(&mount_cv); |
|
443 |
mutex_exit(&mount_lock); |
|
444 |
} |
|
445 |
||
446 |
/* |
|
447 |
* ZSD routines. |
|
448 |
* |
|
449 |
* Zone Specific Data (ZSD) is modeled after Thread Specific Data as |
|
450 |
* defined by the pthread_key_create() and related interfaces. |
|
451 |
* |
|
452 |
* Kernel subsystems may register one or more data items and/or |
|
453 |
* callbacks to be executed when a zone is created, shutdown, or |
|
454 |
* destroyed. |
|
455 |
* |
|
456 |
* Unlike the thread counterpart, destructor callbacks will be executed |
|
457 |
* even if the data pointer is NULL and/or there are no constructor |
|
458 |
* callbacks, so it is the responsibility of such callbacks to check for |
|
459 |
* NULL data values if necessary. |
|
460 |
* |
|
461 |
* The locking strategy and overall picture is as follows: |
|
462 |
* |
|
463 |
* When someone calls zone_key_create(), a template ZSD entry is added to the |
|
464 |
* global list "zsd_registered_keys", protected by zsd_key_lock. The |
|
465 |
* constructor callback is called immediately on all existing zones, and a |
|
466 |
* copy of the ZSD entry added to the per-zone zone_zsd list (protected by |
|
467 |
* zone_lock). As this operation requires the list of zones, the list of |
|
468 |
* registered keys, and the per-zone list of ZSD entries to remain constant |
|
469 |
* throughout the entire operation, it must grab zonehash_lock, zone_lock for |
|
470 |
* all existing zones, and zsd_key_lock, in that order. Similar locking is |
|
471 |
* needed when zone_key_delete() is called. It is thus sufficient to hold |
|
472 |
* zsd_key_lock *or* zone_lock to prevent additions to or removals from the |
|
473 |
* per-zone zone_zsd list. |
|
474 |
* |
|
475 |
* Note that this implementation does not make a copy of the ZSD entry if a |
|
476 |
* constructor callback is not provided. A zone_getspecific() on such an |
|
477 |
* uninitialized ZSD entry will return NULL. |
|
478 |
* |
|
479 |
* When new zones are created constructor callbacks for all registered ZSD |
|
480 |
* entries will be called. |
|
481 |
* |
|
482 |
* The framework does not provide any locking around zone_getspecific() and |
|
483 |
* zone_setspecific() apart from that needed for internal consistency, so |
|
484 |
* callers interested in atomic "test-and-set" semantics will need to provide |
|
485 |
* their own locking. |
|
486 |
*/ |
|
487 |
void |
|
488 |
zone_key_create(zone_key_t *keyp, void *(*create)(zoneid_t), |
|
489 |
void (*shutdown)(zoneid_t, void *), void (*destroy)(zoneid_t, void *)) |
|
490 |
{ |
|
491 |
struct zsd_entry *zsdp; |
|
492 |
struct zsd_entry *t; |
|
493 |
struct zone *zone; |
|
494 |
||
495 |
zsdp = kmem_alloc(sizeof (*zsdp), KM_SLEEP); |
|
496 |
zsdp->zsd_data = NULL; |
|
497 |
zsdp->zsd_create = create; |
|
498 |
zsdp->zsd_shutdown = shutdown; |
|
499 |
zsdp->zsd_destroy = destroy; |
|
500 |
||
501 |
mutex_enter(&zonehash_lock); /* stop the world */ |
|
502 |
for (zone = list_head(&zone_active); zone != NULL; |
|
503 |
zone = list_next(&zone_active, zone)) |
|
504 |
mutex_enter(&zone->zone_lock); /* lock all zones */ |
|
505 |
||
506 |
mutex_enter(&zsd_key_lock); |
|
507 |
*keyp = zsdp->zsd_key = ++zsd_keyval; |
|
508 |
ASSERT(zsd_keyval != 0); |
|
509 |
list_insert_tail(&zsd_registered_keys, zsdp); |
|
510 |
mutex_exit(&zsd_key_lock); |
|
511 |
||
512 |
if (create != NULL) { |
|
513 |
for (zone = list_head(&zone_active); zone != NULL; |
|
514 |
zone = list_next(&zone_active, zone)) { |
|
515 |
t = kmem_alloc(sizeof (*t), KM_SLEEP); |
|
516 |
t->zsd_key = *keyp; |
|
517 |
t->zsd_data = (*create)(zone->zone_id); |
|
518 |
t->zsd_create = create; |
|
519 |
t->zsd_shutdown = shutdown; |
|
520 |
t->zsd_destroy = destroy; |
|
521 |
list_insert_tail(&zone->zone_zsd, t); |
|
522 |
} |
|
523 |
} |
|
524 |
for (zone = list_head(&zone_active); zone != NULL; |
|
525 |
zone = list_next(&zone_active, zone)) |
|
526 |
mutex_exit(&zone->zone_lock); |
|
527 |
mutex_exit(&zonehash_lock); |
|
528 |
} |
|
529 |
||
530 |
/* |
|
531 |
* Helper function to find the zsd_entry associated with the key in the |
|
532 |
* given list. |
|
533 |
*/ |
|
534 |
static struct zsd_entry * |
|
535 |
zsd_find(list_t *l, zone_key_t key) |
|
536 |
{ |
|
537 |
struct zsd_entry *zsd; |
|
538 |
||
539 |
for (zsd = list_head(l); zsd != NULL; zsd = list_next(l, zsd)) { |
|
540 |
if (zsd->zsd_key == key) { |
|
541 |
/* |
|
542 |
* Move to head of list to keep list in MRU order. |
|
543 |
*/ |
|
544 |
if (zsd != list_head(l)) { |
|
545 |
list_remove(l, zsd); |
|
546 |
list_insert_head(l, zsd); |
|
547 |
} |
|
548 |
return (zsd); |
|
549 |
} |
|
550 |
} |
|
551 |
return (NULL); |
|
552 |
} |
|
553 |
||
554 |
/* |
|
555 |
* Function called when a module is being unloaded, or otherwise wishes |
|
556 |
* to unregister its ZSD key and callbacks. |
|
557 |
*/ |
|
558 |
int |
|
559 |
zone_key_delete(zone_key_t key) |
|
560 |
{ |
|
561 |
struct zsd_entry *zsdp = NULL; |
|
562 |
zone_t *zone; |
|
563 |
||
564 |
mutex_enter(&zonehash_lock); /* Zone create/delete waits for us */ |
|
565 |
for (zone = list_head(&zone_active); zone != NULL; |
|
566 |
zone = list_next(&zone_active, zone)) |
|
567 |
mutex_enter(&zone->zone_lock); /* lock all zones */ |
|
568 |
||
569 |
mutex_enter(&zsd_key_lock); |
|
570 |
zsdp = zsd_find(&zsd_registered_keys, key); |
|
571 |
if (zsdp == NULL) |
|
572 |
goto notfound; |
|
573 |
list_remove(&zsd_registered_keys, zsdp); |
|
574 |
mutex_exit(&zsd_key_lock); |
|
575 |
||
576 |
for (zone = list_head(&zone_active); zone != NULL; |
|
577 |
zone = list_next(&zone_active, zone)) { |
|
578 |
struct zsd_entry *del; |
|
579 |
void *data; |
|
580 |
||
581 |
if (!(zone->zone_flags & ZF_DESTROYED)) { |
|
582 |
del = zsd_find(&zone->zone_zsd, key); |
|
583 |
if (del != NULL) { |
|
584 |
data = del->zsd_data; |
|
585 |
ASSERT(del->zsd_shutdown == zsdp->zsd_shutdown); |
|
586 |
ASSERT(del->zsd_destroy == zsdp->zsd_destroy); |
|
587 |
list_remove(&zone->zone_zsd, del); |
|
588 |
kmem_free(del, sizeof (*del)); |
|
589 |
} else { |
|
590 |
data = NULL; |
|
591 |
} |
|
592 |
if (zsdp->zsd_shutdown) |
|
593 |
zsdp->zsd_shutdown(zone->zone_id, data); |
|
594 |
if (zsdp->zsd_destroy) |
|
595 |
zsdp->zsd_destroy(zone->zone_id, data); |
|
596 |
} |
|
597 |
mutex_exit(&zone->zone_lock); |
|
598 |
} |
|
599 |
mutex_exit(&zonehash_lock); |
|
600 |
kmem_free(zsdp, sizeof (*zsdp)); |
|
601 |
return (0); |
|
602 |
||
603 |
notfound: |
|
604 |
mutex_exit(&zsd_key_lock); |
|
605 |
for (zone = list_head(&zone_active); zone != NULL; |
|
606 |
zone = list_next(&zone_active, zone)) |
|
607 |
mutex_exit(&zone->zone_lock); |
|
608 |
mutex_exit(&zonehash_lock); |
|
609 |
return (-1); |
|
610 |
} |
|
611 |
||
612 |
/* |
|
613 |
* ZSD counterpart of pthread_setspecific(). |
|
614 |
*/ |
|
615 |
int |
|
616 |
zone_setspecific(zone_key_t key, zone_t *zone, const void *data) |
|
617 |
{ |
|
618 |
struct zsd_entry *t; |
|
619 |
struct zsd_entry *zsdp = NULL; |
|
620 |
||
621 |
mutex_enter(&zone->zone_lock); |
|
622 |
t = zsd_find(&zone->zone_zsd, key); |
|
623 |
if (t != NULL) { |
|
624 |
/* |
|
625 |
* Replace old value with new |
|
626 |
*/ |
|
627 |
t->zsd_data = (void *)data; |
|
628 |
mutex_exit(&zone->zone_lock); |
|
629 |
return (0); |
|
630 |
} |
|
631 |
/* |
|
632 |
* If there was no previous value, go through the list of registered |
|
633 |
* keys. |
|
634 |
* |
|
635 |
* We avoid grabbing zsd_key_lock until we are sure we need it; this is |
|
636 |
* necessary for shutdown callbacks to be able to execute without fear |
|
637 |
* of deadlock. |
|
638 |
*/ |
|
639 |
mutex_enter(&zsd_key_lock); |
|
640 |
zsdp = zsd_find(&zsd_registered_keys, key); |
|
641 |
if (zsdp == NULL) { /* Key was not registered */ |
|
642 |
mutex_exit(&zsd_key_lock); |
|
643 |
mutex_exit(&zone->zone_lock); |
|
644 |
return (-1); |
|
645 |
} |
|
646 |
||
647 |
/* |
|
648 |
* Add a zsd_entry to this zone, using the template we just retrieved |
|
649 |
* to initialize the constructor and destructor(s). |
|
650 |
*/ |
|
651 |
t = kmem_alloc(sizeof (*t), KM_SLEEP); |
|
652 |
t->zsd_key = key; |
|
653 |
t->zsd_data = (void *)data; |
|
654 |
t->zsd_create = zsdp->zsd_create; |
|
655 |
t->zsd_shutdown = zsdp->zsd_shutdown; |
|
656 |
t->zsd_destroy = zsdp->zsd_destroy; |
|
657 |
list_insert_tail(&zone->zone_zsd, t); |
|
658 |
mutex_exit(&zsd_key_lock); |
|
659 |
mutex_exit(&zone->zone_lock); |
|
660 |
return (0); |
|
661 |
} |
|
662 |
||
663 |
/* |
|
664 |
* ZSD counterpart of pthread_getspecific(). |
|
665 |
*/ |
|
666 |
void * |
|
667 |
zone_getspecific(zone_key_t key, zone_t *zone) |
|
668 |
{ |
|
669 |
struct zsd_entry *t; |
|
670 |
void *data; |
|
671 |
||
672 |
mutex_enter(&zone->zone_lock); |
|
673 |
t = zsd_find(&zone->zone_zsd, key); |
|
674 |
data = (t == NULL ? NULL : t->zsd_data); |
|
675 |
mutex_exit(&zone->zone_lock); |
|
676 |
return (data); |
|
677 |
} |
|
678 |
||
679 |
/* |
|
680 |
* Function used to initialize a zone's list of ZSD callbacks and data |
|
681 |
* when the zone is being created. The callbacks are initialized from |
|
682 |
* the template list (zsd_registered_keys), and the constructor |
|
683 |
* callback executed (if one exists). |
|
684 |
* |
|
685 |
* This is called before the zone is made publicly available, hence no |
|
686 |
* need to grab zone_lock. |
|
687 |
* |
|
688 |
* Although we grab and release zsd_key_lock, new entries cannot be |
|
689 |
* added to or removed from the zsd_registered_keys list until we |
|
690 |
* release zonehash_lock, so there isn't a window for a |
|
691 |
* zone_key_create() to come in after we've dropped zsd_key_lock but |
|
692 |
* before the zone is added to the zone list, such that the constructor |
|
693 |
* callbacks aren't executed for the new zone. |
|
694 |
*/ |
|
695 |
static void |
|
696 |
zone_zsd_configure(zone_t *zone) |
|
697 |
{ |
|
698 |
struct zsd_entry *zsdp; |
|
699 |
struct zsd_entry *t; |
|
700 |
zoneid_t zoneid = zone->zone_id; |
|
701 |
||
702 |
ASSERT(MUTEX_HELD(&zonehash_lock)); |
|
703 |
ASSERT(list_head(&zone->zone_zsd) == NULL); |
|
704 |
mutex_enter(&zsd_key_lock); |
|
705 |
for (zsdp = list_head(&zsd_registered_keys); zsdp != NULL; |
|
706 |
zsdp = list_next(&zsd_registered_keys, zsdp)) { |
|
707 |
if (zsdp->zsd_create != NULL) { |
|
708 |
t = kmem_alloc(sizeof (*t), KM_SLEEP); |
|
709 |
t->zsd_key = zsdp->zsd_key; |
|
710 |
t->zsd_create = zsdp->zsd_create; |
|
711 |
t->zsd_data = (*t->zsd_create)(zoneid); |
|
712 |
t->zsd_shutdown = zsdp->zsd_shutdown; |
|
713 |
t->zsd_destroy = zsdp->zsd_destroy; |
|
714 |
list_insert_tail(&zone->zone_zsd, t); |
|
715 |
} |
|
716 |
} |
|
717 |
mutex_exit(&zsd_key_lock); |
|
718 |
} |
|
719 |
||
720 |
enum zsd_callback_type { ZSD_CREATE, ZSD_SHUTDOWN, ZSD_DESTROY }; |
|
721 |
||
722 |
/* |
|
723 |
* Helper function to execute shutdown or destructor callbacks. |
|
724 |
*/ |
|
725 |
static void |
|
726 |
zone_zsd_callbacks(zone_t *zone, enum zsd_callback_type ct) |
|
727 |
{ |
|
728 |
struct zsd_entry *zsdp; |
|
729 |
struct zsd_entry *t; |
|
730 |
zoneid_t zoneid = zone->zone_id; |
|
731 |
||
732 |
ASSERT(ct == ZSD_SHUTDOWN || ct == ZSD_DESTROY); |
|
733 |
ASSERT(ct != ZSD_SHUTDOWN || zone_status_get(zone) >= ZONE_IS_EMPTY); |
|
734 |
ASSERT(ct != ZSD_DESTROY || zone_status_get(zone) >= ZONE_IS_DOWN); |
|
735 |
||
736 |
mutex_enter(&zone->zone_lock); |
|
737 |
if (ct == ZSD_DESTROY) { |
|
738 |
if (zone->zone_flags & ZF_DESTROYED) { |
|
739 |
/* |
|
740 |
* Make sure destructors are only called once. |
|
741 |
*/ |
|
742 |
mutex_exit(&zone->zone_lock); |
|
743 |
return; |
|
744 |
} |
|
745 |
zone->zone_flags |= ZF_DESTROYED; |
|
746 |
} |
|
747 |
mutex_exit(&zone->zone_lock); |
|
748 |
||
749 |
/* |
|
750 |
* Both zsd_key_lock and zone_lock need to be held in order to add or |
|
751 |
* remove a ZSD key, (either globally as part of |
|
752 |
* zone_key_create()/zone_key_delete(), or on a per-zone basis, as is |
|
753 |
* possible through zone_setspecific()), so it's sufficient to hold |
|
754 |
* zsd_key_lock here. |
|
755 |
* |
|
756 |
* This is a good thing, since we don't want to recursively try to grab |
|
757 |
* zone_lock if a callback attempts to do something like a crfree() or |
|
758 |
* zone_rele(). |
|
759 |
*/ |
|
760 |
mutex_enter(&zsd_key_lock); |
|
761 |
for (zsdp = list_head(&zsd_registered_keys); zsdp != NULL; |
|
762 |
zsdp = list_next(&zsd_registered_keys, zsdp)) { |
|
763 |
zone_key_t key = zsdp->zsd_key; |
|
764 |
||
765 |
/* Skip if no callbacks registered */ |
|
766 |
if (ct == ZSD_SHUTDOWN && zsdp->zsd_shutdown == NULL) |
|
767 |
continue; |
|
768 |
if (ct == ZSD_DESTROY && zsdp->zsd_destroy == NULL) |
|
769 |
continue; |
|
770 |
/* |
|
771 |
* Call the callback with the zone-specific data if we can find |
|
772 |
* any, otherwise with NULL. |
|
773 |
*/ |
|
774 |
t = zsd_find(&zone->zone_zsd, key); |
|
775 |
if (t != NULL) { |
|
776 |
if (ct == ZSD_SHUTDOWN) { |
|
777 |
t->zsd_shutdown(zoneid, t->zsd_data); |
|
778 |
} else { |
|
779 |
ASSERT(ct == ZSD_DESTROY); |
|
780 |
t->zsd_destroy(zoneid, t->zsd_data); |
|
781 |
} |
|
782 |
} else { |
|
783 |
if (ct == ZSD_SHUTDOWN) { |
|
784 |
zsdp->zsd_shutdown(zoneid, NULL); |
|
785 |
} else { |
|
786 |
ASSERT(ct == ZSD_DESTROY); |
|
787 |
zsdp->zsd_destroy(zoneid, NULL); |
|
788 |
} |
|
789 |
} |
|
790 |
} |
|
791 |
mutex_exit(&zsd_key_lock); |
|
792 |
} |
|
793 |
||
794 |
/* |
|
795 |
* Called when the zone is going away; free ZSD-related memory, and |
|
796 |
* destroy the zone_zsd list. |
|
797 |
*/ |
|
798 |
static void |
|
799 |
zone_free_zsd(zone_t *zone) |
|
800 |
{ |
|
801 |
struct zsd_entry *t, *next; |
|
802 |
||
803 |
/* |
|
804 |
* Free all the zsd_entry's we had on this zone. |
|
805 |
*/ |
|
806 |
for (t = list_head(&zone->zone_zsd); t != NULL; t = next) { |
|
807 |
next = list_next(&zone->zone_zsd, t); |
|
808 |
list_remove(&zone->zone_zsd, t); |
|
809 |
kmem_free(t, sizeof (*t)); |
|
810 |
} |
|
811 |
list_destroy(&zone->zone_zsd); |
|
812 |
} |
|
813 |
||
814 |
/* |
|
789 | 815 |
* Frees memory associated with the zone dataset list. |
816 |
*/ |
|
817 |
static void |
|
818 |
zone_free_datasets(zone_t *zone) |
|
819 |
{ |
|
820 |
zone_dataset_t *t, *next; |
|
821 |
||
822 |
for (t = list_head(&zone->zone_datasets); t != NULL; t = next) { |
|
823 |
next = list_next(&zone->zone_datasets, t); |
|
824 |
list_remove(&zone->zone_datasets, t); |
|
825 |
kmem_free(t->zd_dataset, strlen(t->zd_dataset) + 1); |
|
826 |
kmem_free(t, sizeof (*t)); |
|
827 |
} |
|
828 |
list_destroy(&zone->zone_datasets); |
|
829 |
} |
|
830 |
||
831 |
/* |
|
0 | 832 |
* zone.cpu-shares resource control support. |
833 |
*/ |
|
834 |
/*ARGSUSED*/ |
|
835 |
static rctl_qty_t |
|
836 |
zone_cpu_shares_usage(rctl_t *rctl, struct proc *p) |
|
837 |
{ |
|
838 |
ASSERT(MUTEX_HELD(&p->p_lock)); |
|
839 |
return (p->p_zone->zone_shares); |
|
840 |
} |
|
841 |
||
842 |
/*ARGSUSED*/ |
|
843 |
static int |
|
844 |
zone_cpu_shares_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, |
|
845 |
rctl_qty_t nv) |
|
846 |
{ |
|
847 |
ASSERT(MUTEX_HELD(&p->p_lock)); |
|
848 |
ASSERT(e->rcep_t == RCENTITY_ZONE); |
|
849 |
if (e->rcep_p.zone == NULL) |
|
850 |
return (0); |
|
851 |
||
852 |
e->rcep_p.zone->zone_shares = nv; |
|
853 |
return (0); |
|
854 |
} |
|
855 |
||
856 |
static rctl_ops_t zone_cpu_shares_ops = { |
|
857 |
rcop_no_action, |
|
858 |
zone_cpu_shares_usage, |
|
859 |
zone_cpu_shares_set, |
|
860 |
rcop_no_test |
|
861 |
}; |
|
862 |
||
863 |
/*ARGSUSED*/ |
|
864 |
static rctl_qty_t |
|
865 |
zone_lwps_usage(rctl_t *r, proc_t *p) |
|
866 |
{ |
|
867 |
rctl_qty_t nlwps; |
|
868 |
zone_t *zone = p->p_zone; |
|
869 |
||
870 |
ASSERT(MUTEX_HELD(&p->p_lock)); |
|
871 |
||
872 |
mutex_enter(&zone->zone_nlwps_lock); |
|
873 |
nlwps = zone->zone_nlwps; |
|
874 |
mutex_exit(&zone->zone_nlwps_lock); |
|
875 |
||
876 |
return (nlwps); |
|
877 |
} |
|
878 |
||
879 |
/*ARGSUSED*/ |
|
880 |
static int |
|
881 |
zone_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl, |
|
882 |
rctl_qty_t incr, uint_t flags) |
|
883 |
{ |
|
884 |
rctl_qty_t nlwps; |
|
885 |
||
886 |
ASSERT(MUTEX_HELD(&p->p_lock)); |
|
887 |
ASSERT(e->rcep_t == RCENTITY_ZONE); |
|
888 |
if (e->rcep_p.zone == NULL) |
|
889 |
return (0); |
|
890 |
ASSERT(MUTEX_HELD(&(e->rcep_p.zone->zone_nlwps_lock))); |
|
891 |
nlwps = e->rcep_p.zone->zone_nlwps; |
|
892 |
||
893 |
if (nlwps + incr > rcntl->rcv_value) |
|
894 |
return (1); |
|
895 |
||
896 |
return (0); |
|
897 |
} |
|
898 |
||
899 |
/*ARGSUSED*/ |
|
900 |
static int |
|
901 |
zone_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv) { |
|
902 |
||
903 |
ASSERT(MUTEX_HELD(&p->p_lock)); |
|
904 |
ASSERT(e->rcep_t == RCENTITY_ZONE); |
|
905 |
if (e->rcep_p.zone == NULL) |
|
906 |
return (0); |
|
907 |
e->rcep_p.zone->zone_nlwps_ctl = nv; |
|
908 |
return (0); |
|
909 |
} |
|
910 |
||
911 |
static rctl_ops_t zone_lwps_ops = { |
|
912 |
rcop_no_action, |
|
913 |
zone_lwps_usage, |
|
914 |
zone_lwps_set, |
|
915 |
zone_lwps_test, |
|
916 |
}; |
|
917 |
||
918 |
/* |
|
919 |
* Helper function to brand the zone with a unique ID. |
|
920 |
*/ |
|
921 |
static void |
|
922 |
zone_uniqid(zone_t *zone) |
|
923 |
{ |
|
924 |
static uint64_t uniqid = 0; |
|
925 |
||
926 |
ASSERT(MUTEX_HELD(&zonehash_lock)); |
|
927 |
zone->zone_uniqid = uniqid++; |
|
928 |
} |
|
929 |
||
930 |
/* |
|
931 |
* Returns a held pointer to the "kcred" for the specified zone. |
|
932 |
*/ |
|
933 |
struct cred * |
|
934 |
zone_get_kcred(zoneid_t zoneid) |
|
935 |
{ |
|
936 |
zone_t *zone; |
|
937 |
cred_t *cr; |
|
938 |
||
939 |
if ((zone = zone_find_by_id(zoneid)) == NULL) |
|
940 |
return (NULL); |
|
941 |
cr = zone->zone_kcred; |
|
942 |
crhold(cr); |
|
943 |
zone_rele(zone); |
|
944 |
return (cr); |
|
945 |
} |
|
946 |
||
947 |
/* |
|
948 |
* Called very early on in boot to initialize the ZSD list so that |
|
949 |
* zone_key_create() can be called before zone_init(). It also initializes |
|
950 |
* portions of zone0 which may be used before zone_init() is called. The |
|
951 |
* variable "global_zone" will be set when zone0 is fully initialized by |
|
952 |
* zone_init(). |
|
953 |
*/ |
|
954 |
void |
|
955 |
zone_zsd_init(void) |
|
956 |
{ |
|
957 |
mutex_init(&zonehash_lock, NULL, MUTEX_DEFAULT, NULL); |
|
958 |
mutex_init(&zsd_key_lock, NULL, MUTEX_DEFAULT, NULL); |
|
959 |
list_create(&zsd_registered_keys, sizeof (struct zsd_entry), |
|
960 |
offsetof(struct zsd_entry, zsd_linkage)); |
|
961 |
list_create(&zone_active, sizeof (zone_t), |
|
962 |
offsetof(zone_t, zone_linkage)); |
|
963 |
list_create(&zone_deathrow, sizeof (zone_t), |
|
964 |
offsetof(zone_t, zone_linkage)); |
|
965 |
||
966 |
mutex_init(&zone0.zone_lock, NULL, MUTEX_DEFAULT, NULL); |
|
967 |
mutex_init(&zone0.zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL); |
|
968 |
zone0.zone_shares = 1; |
|
969 |
zone0.zone_nlwps_ctl = INT_MAX; |
|
970 |
zone0.zone_name = GLOBAL_ZONENAME; |
|
971 |
zone0.zone_nodename = utsname.nodename; |
|
972 |
zone0.zone_domain = srpc_domain; |
|
973 |
zone0.zone_ref = 1; |
|
974 |
zone0.zone_id = GLOBAL_ZONEID; |
|
975 |
zone0.zone_status = ZONE_IS_RUNNING; |
|
976 |
zone0.zone_rootpath = "/"; |
|
977 |
zone0.zone_rootpathlen = 2; |
|
978 |
zone0.zone_psetid = ZONE_PS_INVAL; |
|
979 |
zone0.zone_ncpus = 0; |
|
980 |
zone0.zone_ncpus_online = 0; |
|
981 |
zone0.zone_proc_initpid = 1; |
|
2267 | 982 |
zone0.zone_initname = initname; |
0 | 983 |
list_create(&zone0.zone_zsd, sizeof (struct zsd_entry), |
984 |
offsetof(struct zsd_entry, zsd_linkage)); |
|
985 |
list_insert_head(&zone_active, &zone0); |
|
986 |
||
987 |
/* |
|
988 |
* The root filesystem is not mounted yet, so zone_rootvp cannot be set |
|
989 |
* to anything meaningful. It is assigned to be 'rootdir' in |
|
990 |
* vfs_mountroot(). |
|
991 |
*/ |
|
992 |
zone0.zone_rootvp = NULL; |
|
993 |
zone0.zone_vfslist = NULL; |
|
2267 | 994 |
zone0.zone_bootargs = initargs; |
0 | 995 |
zone0.zone_privset = kmem_alloc(sizeof (priv_set_t), KM_SLEEP); |
996 |
/* |
|
997 |
* The global zone has all privileges |
|
998 |
*/ |
|
999 |
priv_fillset(zone0.zone_privset); |
|
1000 |
/* |
|
1001 |
* Add p0 to the global zone |
|
1002 |
*/ |
|
1003 |
zone0.zone_zsched = &p0; |
|
1004 |
p0.p_zone = &zone0; |
|
1005 |
} |
|
1006 |
||
1007 |
/* |
|
1676 | 1008 |
* Compute a hash value based on the contents of the label and the DOI. The |
1009 |
* hash algorithm is somewhat arbitrary, but is based on the observation that |
|
1010 |
* humans will likely pick labels that differ by amounts that work out to be |
|
1011 |
* multiples of the number of hash chains, and thus stirring in some primes |
|
1012 |
* should help. |
|
1013 |
*/ |
|
1014 |
static uint_t |
|
1015 |
hash_bylabel(void *hdata, mod_hash_key_t key) |
|
1016 |
{ |
|
1017 |
const ts_label_t *lab = (ts_label_t *)key; |
|
1018 |
const uint32_t *up, *ue; |
|
1019 |
uint_t hash; |
|
1020 |
int i; |
|
1021 |
||
1022 |
_NOTE(ARGUNUSED(hdata)); |
|
1023 |
||
1024 |
hash = lab->tsl_doi + (lab->tsl_doi << 1); |
|
1025 |
/* we depend on alignment of label, but not representation */ |
|
1026 |
up = (const uint32_t *)&lab->tsl_label; |
|
1027 |
ue = up + sizeof (lab->tsl_label) / sizeof (*up); |
|
1028 |
i = 1; |
|
1029 |
while (up < ue) { |
|
1030 |
/* using 2^n + 1, 1 <= n <= 16 as source of many primes */ |
|
1031 |
hash += *up + (*up << ((i % 16) + 1)); |
|
1032 |
up++; |
|
1033 |
i++; |
|
1034 |
} |
|
1035 |
return (hash); |
|
1036 |
} |
|
1037 |
||
1038 |
/* |
|
1039 |
* All that mod_hash cares about here is zero (equal) versus non-zero (not |
|
1040 |
* equal). This may need to be changed if less than / greater than is ever |
|
1041 |
* needed. |
|
1042 |
*/ |
|
1043 |
static int |
|
1044 |
hash_labelkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2) |
|
1045 |
{ |
|
1046 |
ts_label_t *lab1 = (ts_label_t *)key1; |
|
1047 |
ts_label_t *lab2 = (ts_label_t *)key2; |
|
1048 |
||
1049 |
return (label_equal(lab1, lab2) ? 0 : 1); |
|
1050 |
} |
|
1051 |
||
1052 |
/* |
|
0 | 1053 |
* Called by main() to initialize the zones framework. |
1054 |
*/ |
|
1055 |
void |
|
1056 |
zone_init(void) |
|
1057 |
{ |
|
1058 |
rctl_dict_entry_t *rde; |
|
1059 |
rctl_val_t *dval; |
|
1060 |
rctl_set_t *set; |
|
1061 |
rctl_alloc_gp_t *gp; |
|
1062 |
rctl_entity_p_t e; |
|
1166 | 1063 |
int res; |
0 | 1064 |
|
1065 |
ASSERT(curproc == &p0); |
|
1066 |
||
1067 |
/* |
|
1068 |
* Create ID space for zone IDs. ID 0 is reserved for the |
|
1069 |
* global zone. |
|
1070 |
*/ |
|
1071 |
zoneid_space = id_space_create("zoneid_space", 1, MAX_ZONEID); |
|
1072 |
||
1073 |
/* |
|
1074 |
* Initialize generic zone resource controls, if any. |
|
1075 |
*/ |
|
1076 |
rc_zone_cpu_shares = rctl_register("zone.cpu-shares", |
|
1077 |
RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER | |
|
1996
1bd5128dcd61
6294710 rctladm incorrectly claims and reports it can log to syslog for project.cpu-shares
ml93401
parents:
1876
diff
changeset
|
1078 |
RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER, |
1bd5128dcd61
6294710 rctladm incorrectly claims and reports it can log to syslog for project.cpu-shares
ml93401
parents:
1876
diff
changeset
|
1079 |
FSS_MAXSHARES, FSS_MAXSHARES, |
0 | 1080 |
&zone_cpu_shares_ops); |
1081 |
||
1082 |
rc_zone_nlwps = rctl_register("zone.max-lwps", RCENTITY_ZONE, |
|
1083 |
RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT, |
|
1084 |
INT_MAX, INT_MAX, &zone_lwps_ops); |
|
1085 |
/* |
|
1086 |
* Create a rctl_val with PRIVILEGED, NOACTION, value = 1. Then attach |
|
1087 |
* this at the head of the rctl_dict_entry for ``zone.cpu-shares''. |
|
1088 |
*/ |
|
1089 |
dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); |
|
1090 |
bzero(dval, sizeof (rctl_val_t)); |
|
1091 |
dval->rcv_value = 1; |
|
1092 |
dval->rcv_privilege = RCPRIV_PRIVILEGED; |
|
1093 |
dval->rcv_flagaction = RCTL_LOCAL_NOACTION; |
|
1094 |
dval->rcv_action_recip_pid = -1; |
|
1095 |
||
1096 |
rde = rctl_dict_lookup("zone.cpu-shares"); |
|
1097 |
(void) rctl_val_list_insert(&rde->rcd_default_value, dval); |
|
1098 |
||
1099 |
/* |
|
1100 |
* Initialize the ``global zone''. |
|
1101 |
*/ |
|
1102 |
set = rctl_set_create(); |
|
1103 |
gp = rctl_set_init_prealloc(RCENTITY_ZONE); |
|
1104 |
mutex_enter(&p0.p_lock); |
|
1105 |
e.rcep_p.zone = &zone0; |
|
1106 |
e.rcep_t = RCENTITY_ZONE; |
|
1107 |
zone0.zone_rctls = rctl_set_init(RCENTITY_ZONE, &p0, &e, set, |
|
1108 |
gp); |
|
1109 |
||
1110 |
zone0.zone_nlwps = p0.p_lwpcnt; |
|
1111 |
zone0.zone_ntasks = 1; |
|
1112 |
mutex_exit(&p0.p_lock); |
|
1113 |
rctl_prealloc_destroy(gp); |
|
1114 |
/* |
|
1115 |
* pool_default hasn't been initialized yet, so we let pool_init() take |
|
1116 |
* care of making the global zone is in the default pool. |
|
1117 |
*/ |
|
1676 | 1118 |
|
1119 |
/* |
|
1120 |
* Initialize zone label. |
|
1121 |
* mlp are initialized when tnzonecfg is loaded. |
|
1122 |
*/ |
|
1123 |
zone0.zone_slabel = l_admin_low; |
|
1124 |
rw_init(&zone0.zone_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL); |
|
1125 |
label_hold(l_admin_low); |
|
1126 |
||
0 | 1127 |
mutex_enter(&zonehash_lock); |
1128 |
zone_uniqid(&zone0); |
|
1129 |
ASSERT(zone0.zone_uniqid == GLOBAL_ZONEUNIQID); |
|
1676 | 1130 |
|
0 | 1131 |
zonehashbyid = mod_hash_create_idhash("zone_by_id", zone_hash_size, |
1132 |
mod_hash_null_valdtor); |
|
1133 |
zonehashbyname = mod_hash_create_strhash("zone_by_name", |
|
1134 |
zone_hash_size, mod_hash_null_valdtor); |
|
1676 | 1135 |
/* |
1136 |
* maintain zonehashbylabel only for labeled systems |
|
1137 |
*/ |
|
1138 |
if (is_system_labeled()) |
|
1139 |
zonehashbylabel = mod_hash_create_extended("zone_by_label", |
|
1140 |
zone_hash_size, mod_hash_null_keydtor, |
|
1141 |
mod_hash_null_valdtor, hash_bylabel, NULL, |
|
1142 |
hash_labelkey_cmp, KM_SLEEP); |
|
0 | 1143 |
zonecount = 1; |
1144 |
||
1145 |
(void) mod_hash_insert(zonehashbyid, (mod_hash_key_t)GLOBAL_ZONEID, |
|
1146 |
(mod_hash_val_t)&zone0); |
|
1147 |
(void) mod_hash_insert(zonehashbyname, (mod_hash_key_t)zone0.zone_name, |
|
1148 |
(mod_hash_val_t)&zone0); |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
1149 |
if (is_system_labeled()) { |
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
1150 |
zone0.zone_flags |= ZF_HASHED_LABEL; |
1676 | 1151 |
(void) mod_hash_insert(zonehashbylabel, |
1152 |
(mod_hash_key_t)zone0.zone_slabel, (mod_hash_val_t)&zone0); |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
1153 |
} |
1676 | 1154 |
mutex_exit(&zonehash_lock); |
1155 |
||
0 | 1156 |
/* |
1157 |
* We avoid setting zone_kcred until now, since kcred is initialized |
|
1158 |
* sometime after zone_zsd_init() and before zone_init(). |
|
1159 |
*/ |
|
1160 |
zone0.zone_kcred = kcred; |
|
1161 |
/* |
|
1162 |
* The global zone is fully initialized (except for zone_rootvp which |
|
1163 |
* will be set when the root filesystem is mounted). |
|
1164 |
*/ |
|
1165 |
global_zone = &zone0; |
|
1166 | 1166 |
|
1167 |
/* |
|
1168 |
* Setup an event channel to send zone status change notifications on |
|
1169 |
*/ |
|
1170 |
res = sysevent_evc_bind(ZONE_EVENT_CHANNEL, &zone_event_chan, |
|
1171 |
EVCH_CREAT); |
|
1172 |
||
1173 |
if (res) |
|
1174 |
panic("Sysevent_evc_bind failed during zone setup.\n"); |
|
0 | 1175 |
} |
1176 |
||
1177 |
static void |
|
1178 |
zone_free(zone_t *zone) |
|
1179 |
{ |
|
1180 |
ASSERT(zone != global_zone); |
|
1181 |
ASSERT(zone->zone_ntasks == 0); |
|
1182 |
ASSERT(zone->zone_nlwps == 0); |
|
1183 |
ASSERT(zone->zone_cred_ref == 0); |
|
1184 |
ASSERT(zone->zone_kcred == NULL); |
|
1185 |
ASSERT(zone_status_get(zone) == ZONE_IS_DEAD || |
|
1186 |
zone_status_get(zone) == ZONE_IS_UNINITIALIZED); |
|
1187 |
||
1188 |
/* remove from deathrow list */ |
|
1189 |
if (zone_status_get(zone) == ZONE_IS_DEAD) { |
|
1190 |
ASSERT(zone->zone_ref == 0); |
|
1191 |
mutex_enter(&zone_deathrow_lock); |
|
1192 |
list_remove(&zone_deathrow, zone); |
|
1193 |
mutex_exit(&zone_deathrow_lock); |
|
1194 |
} |
|
1195 |
||
1196 |
zone_free_zsd(zone); |
|
789 | 1197 |
zone_free_datasets(zone); |
0 | 1198 |
|
1199 |
if (zone->zone_rootvp != NULL) |
|
1200 |
VN_RELE(zone->zone_rootvp); |
|
1201 |
if (zone->zone_rootpath) |
|
1202 |
kmem_free(zone->zone_rootpath, zone->zone_rootpathlen); |
|
1203 |
if (zone->zone_name != NULL) |
|
1204 |
kmem_free(zone->zone_name, ZONENAME_MAX); |
|
1676 | 1205 |
if (zone->zone_slabel != NULL) |
1206 |
label_rele(zone->zone_slabel); |
|
0 | 1207 |
if (zone->zone_nodename != NULL) |
1208 |
kmem_free(zone->zone_nodename, _SYS_NMLN); |
|
1209 |
if (zone->zone_domain != NULL) |
|
1210 |
kmem_free(zone->zone_domain, _SYS_NMLN); |
|
1211 |
if (zone->zone_privset != NULL) |
|
1212 |
kmem_free(zone->zone_privset, sizeof (priv_set_t)); |
|
1213 |
if (zone->zone_rctls != NULL) |
|
1214 |
rctl_set_free(zone->zone_rctls); |
|
1215 |
if (zone->zone_bootargs != NULL) |
|
2267 | 1216 |
kmem_free(zone->zone_bootargs, strlen(zone->zone_bootargs) + 1); |
1217 |
if (zone->zone_initname != NULL) |
|
1218 |
kmem_free(zone->zone_initname, strlen(zone->zone_initname) + 1); |
|
0 | 1219 |
id_free(zoneid_space, zone->zone_id); |
1220 |
mutex_destroy(&zone->zone_lock); |
|
1221 |
cv_destroy(&zone->zone_cv); |
|
1676 | 1222 |
rw_destroy(&zone->zone_mlps.mlpl_rwlock); |
0 | 1223 |
kmem_free(zone, sizeof (zone_t)); |
1224 |
} |
|
1225 |
||
1226 |
/* |
|
1227 |
* See block comment at the top of this file for information about zone |
|
1228 |
* status values. |
|
1229 |
*/ |
|
1230 |
/* |
|
1231 |
* Convenience function for setting zone status. |
|
1232 |
*/ |
|
1233 |
static void |
|
1234 |
zone_status_set(zone_t *zone, zone_status_t status) |
|
1235 |
{ |
|
1166 | 1236 |
|
1237 |
nvlist_t *nvl = NULL; |
|
0 | 1238 |
ASSERT(MUTEX_HELD(&zone_status_lock)); |
1239 |
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE && |
|
1240 |
status >= zone_status_get(zone)); |
|
1166 | 1241 |
|
1242 |
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) || |
|
1243 |
nvlist_add_string(nvl, ZONE_CB_NAME, zone->zone_name) || |
|
1244 |
nvlist_add_string(nvl, ZONE_CB_NEWSTATE, |
|
2267 | 1245 |
zone_status_table[status]) || |
1166 | 1246 |
nvlist_add_string(nvl, ZONE_CB_OLDSTATE, |
2267 | 1247 |
zone_status_table[zone->zone_status]) || |
1166 | 1248 |
nvlist_add_int32(nvl, ZONE_CB_ZONEID, zone->zone_id) || |
1249 |
nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, (uint64_t)gethrtime()) || |
|
1250 |
sysevent_evc_publish(zone_event_chan, ZONE_EVENT_STATUS_CLASS, |
|
2267 | 1251 |
ZONE_EVENT_STATUS_SUBCLASS, "sun.com", "kernel", nvl, EVCH_SLEEP)) { |
1166 | 1252 |
#ifdef DEBUG |
1253 |
(void) printf( |
|
1254 |
"Failed to allocate and send zone state change event.\n"); |
|
1255 |
#endif |
|
1256 |
} |
|
1257 |
nvlist_free(nvl); |
|
1258 |
||
0 | 1259 |
zone->zone_status = status; |
1166 | 1260 |
|
0 | 1261 |
cv_broadcast(&zone->zone_cv); |
1262 |
} |
|
1263 |
||
1264 |
/* |
|
1265 |
* Public function to retrieve the zone status. The zone status may |
|
1266 |
* change after it is retrieved. |
|
1267 |
*/ |
|
1268 |
zone_status_t |
|
1269 |
zone_status_get(zone_t *zone) |
|
1270 |
{ |
|
1271 |
return (zone->zone_status); |
|
1272 |
} |
|
1273 |
||
1274 |
static int |
|
1275 |
zone_set_bootargs(zone_t *zone, const char *zone_bootargs) |
|
1276 |
{ |
|
2267 | 1277 |
char *bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP); |
1278 |
int err = 0; |
|
1279 |
||
1280 |
ASSERT(zone != global_zone); |
|
1281 |
if ((err = copyinstr(zone_bootargs, bootargs, BOOTARGS_MAX, NULL)) != 0) |
|
1282 |
goto done; /* EFAULT or ENAMETOOLONG */ |
|
1283 |
||
1284 |
if (zone->zone_bootargs != NULL) |
|
1285 |
kmem_free(zone->zone_bootargs, strlen(zone->zone_bootargs) + 1); |
|
1286 |
||
1287 |
zone->zone_bootargs = kmem_alloc(strlen(bootargs) + 1, KM_SLEEP); |
|
1288 |
(void) strcpy(zone->zone_bootargs, bootargs); |
|
1289 |
||
1290 |
done: |
|
1291 |
kmem_free(bootargs, BOOTARGS_MAX); |
|
1292 |
return (err); |
|
1293 |
} |
|
1294 |
||
1295 |
static int |
|
1296 |
zone_set_initname(zone_t *zone, const char *zone_initname) |
|
1297 |
{ |
|
1298 |
char initname[INITNAME_SZ]; |
|
0 | 1299 |
size_t len; |
2267 | 1300 |
int err = 0; |
1301 |
||
1302 |
ASSERT(zone != global_zone); |
|
1303 |
if ((err = copyinstr(zone_initname, initname, INITNAME_SZ, &len)) != 0) |
|
0 | 1304 |
return (err); /* EFAULT or ENAMETOOLONG */ |
2267 | 1305 |
|
1306 |
if (zone->zone_initname != NULL) |
|
1307 |
kmem_free(zone->zone_initname, strlen(zone->zone_initname) + 1); |
|
1308 |
||
1309 |
zone->zone_initname = kmem_alloc(strlen(initname) + 1, KM_SLEEP); |
|
1310 |
(void) strcpy(zone->zone_initname, initname); |
|
0 | 1311 |
return (0); |
1312 |
} |
|
1313 |
||
1314 |
/* |
|
1315 |
* Block indefinitely waiting for (zone_status >= status) |
|
1316 |
*/ |
|
1317 |
void |
|
1318 |
zone_status_wait(zone_t *zone, zone_status_t status) |
|
1319 |
{ |
|
1320 |
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE); |
|
1321 |
||
1322 |
mutex_enter(&zone_status_lock); |
|
1323 |
while (zone->zone_status < status) { |
|
1324 |
cv_wait(&zone->zone_cv, &zone_status_lock); |
|
1325 |
} |
|
1326 |
mutex_exit(&zone_status_lock); |
|
1327 |
} |
|
1328 |
||
1329 |
/* |
|
1330 |
* Private CPR-safe version of zone_status_wait(). |
|
1331 |
*/ |
|
1332 |
static void |
|
1333 |
zone_status_wait_cpr(zone_t *zone, zone_status_t status, char *str) |
|
1334 |
{ |
|
1335 |
callb_cpr_t cprinfo; |
|
1336 |
||
1337 |
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE); |
|
1338 |
||
1339 |
CALLB_CPR_INIT(&cprinfo, &zone_status_lock, callb_generic_cpr, |
|
1340 |
str); |
|
1341 |
mutex_enter(&zone_status_lock); |
|
1342 |
while (zone->zone_status < status) { |
|
1343 |
CALLB_CPR_SAFE_BEGIN(&cprinfo); |
|
1344 |
cv_wait(&zone->zone_cv, &zone_status_lock); |
|
1345 |
CALLB_CPR_SAFE_END(&cprinfo, &zone_status_lock); |
|
1346 |
} |
|
1347 |
/* |
|
1348 |
* zone_status_lock is implicitly released by the following. |
|
1349 |
*/ |
|
1350 |
CALLB_CPR_EXIT(&cprinfo); |
|
1351 |
} |
|
1352 |
||
1353 |
/* |
|
1354 |
* Block until zone enters requested state or signal is received. Return (0) |
|
1355 |
* if signaled, non-zero otherwise. |
|
1356 |
*/ |
|
1357 |
int |
|
1358 |
zone_status_wait_sig(zone_t *zone, zone_status_t status) |
|
1359 |
{ |
|
1360 |
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE); |
|
1361 |
||
1362 |
mutex_enter(&zone_status_lock); |
|
1363 |
while (zone->zone_status < status) { |
|
1364 |
if (!cv_wait_sig(&zone->zone_cv, &zone_status_lock)) { |
|
1365 |
mutex_exit(&zone_status_lock); |
|
1366 |
return (0); |
|
1367 |
} |
|
1368 |
} |
|
1369 |
mutex_exit(&zone_status_lock); |
|
1370 |
return (1); |
|
1371 |
} |
|
1372 |
||
1373 |
/* |
|
1374 |
* Block until the zone enters the requested state or the timeout expires, |
|
1375 |
* whichever happens first. Return (-1) if operation timed out, time remaining |
|
1376 |
* otherwise. |
|
1377 |
*/ |
|
1378 |
clock_t |
|
1379 |
zone_status_timedwait(zone_t *zone, clock_t tim, zone_status_t status) |
|
1380 |
{ |
|
1381 |
clock_t timeleft = 0; |
|
1382 |
||
1383 |
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE); |
|
1384 |
||
1385 |
mutex_enter(&zone_status_lock); |
|
1386 |
while (zone->zone_status < status && timeleft != -1) { |
|
1387 |
timeleft = cv_timedwait(&zone->zone_cv, &zone_status_lock, tim); |
|
1388 |
} |
|
1389 |
mutex_exit(&zone_status_lock); |
|
1390 |
return (timeleft); |
|
1391 |
} |
|
1392 |
||
1393 |
/* |
|
1394 |
* Block until the zone enters the requested state, the current process is |
|
1395 |
* signaled, or the timeout expires, whichever happens first. Return (-1) if |
|
1396 |
* operation timed out, 0 if signaled, time remaining otherwise. |
|
1397 |
*/ |
|
1398 |
clock_t |
|
1399 |
zone_status_timedwait_sig(zone_t *zone, clock_t tim, zone_status_t status) |
|
1400 |
{ |
|
1401 |
clock_t timeleft = tim - lbolt; |
|
1402 |
||
1403 |
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE); |
|
1404 |
||
1405 |
mutex_enter(&zone_status_lock); |
|
1406 |
while (zone->zone_status < status) { |
|
1407 |
timeleft = cv_timedwait_sig(&zone->zone_cv, &zone_status_lock, |
|
1408 |
tim); |
|
1409 |
if (timeleft <= 0) |
|
1410 |
break; |
|
1411 |
} |
|
1412 |
mutex_exit(&zone_status_lock); |
|
1413 |
return (timeleft); |
|
1414 |
} |
|
1415 |
||
1416 |
/* |
|
1417 |
* Zones have two reference counts: one for references from credential |
|
1418 |
* structures (zone_cred_ref), and one (zone_ref) for everything else. |
|
1419 |
* This is so we can allow a zone to be rebooted while there are still |
|
1420 |
* outstanding cred references, since certain drivers cache dblks (which |
|
1421 |
* implicitly results in cached creds). We wait for zone_ref to drop to |
|
1422 |
* 0 (actually 1), but not zone_cred_ref. The zone structure itself is |
|
1423 |
* later freed when the zone_cred_ref drops to 0, though nothing other |
|
1424 |
* than the zone id and privilege set should be accessed once the zone |
|
1425 |
* is "dead". |
|
1426 |
* |
|
1427 |
* A debugging flag, zone_wait_for_cred, can be set to a non-zero value |
|
1428 |
* to force halt/reboot to block waiting for the zone_cred_ref to drop |
|
1429 |
* to 0. This can be useful to flush out other sources of cached creds |
|
1430 |
* that may be less innocuous than the driver case. |
|
1431 |
*/ |
|
1432 |
||
1433 |
int zone_wait_for_cred = 0; |
|
1434 |
||
1435 |
static void |
|
1436 |
zone_hold_locked(zone_t *z) |
|
1437 |
{ |
|
1438 |
ASSERT(MUTEX_HELD(&z->zone_lock)); |
|
1439 |
z->zone_ref++; |
|
1440 |
ASSERT(z->zone_ref != 0); |
|
1441 |
} |
|
1442 |
||
1443 |
void |
|
1444 |
zone_hold(zone_t *z) |
|
1445 |
{ |
|
1446 |
mutex_enter(&z->zone_lock); |
|
1447 |
zone_hold_locked(z); |
|
1448 |
mutex_exit(&z->zone_lock); |
|
1449 |
} |
|
1450 |
||
1451 |
/* |
|
1452 |
* If the non-cred ref count drops to 1 and either the cred ref count |
|
1453 |
* is 0 or we aren't waiting for cred references, the zone is ready to |
|
1454 |
* be destroyed. |
|
1455 |
*/ |
|
1456 |
#define ZONE_IS_UNREF(zone) ((zone)->zone_ref == 1 && \ |
|
1457 |
(!zone_wait_for_cred || (zone)->zone_cred_ref == 0)) |
|
1458 |
||
1459 |
void |
|
1460 |
zone_rele(zone_t *z) |
|
1461 |
{ |
|
1462 |
boolean_t wakeup; |
|
1463 |
||
1464 |
mutex_enter(&z->zone_lock); |
|
1465 |
ASSERT(z->zone_ref != 0); |
|
1466 |
z->zone_ref--; |
|
1467 |
if (z->zone_ref == 0 && z->zone_cred_ref == 0) { |
|
1468 |
/* no more refs, free the structure */ |
|
1469 |
mutex_exit(&z->zone_lock); |
|
1470 |
zone_free(z); |
|
1471 |
return; |
|
1472 |
} |
|
1473 |
/* signal zone_destroy so the zone can finish halting */ |
|
1474 |
wakeup = (ZONE_IS_UNREF(z) && zone_status_get(z) >= ZONE_IS_DEAD); |
|
1475 |
mutex_exit(&z->zone_lock); |
|
1476 |
||
1477 |
if (wakeup) { |
|
1478 |
/* |
|
1479 |
* Grabbing zonehash_lock here effectively synchronizes with |
|
1480 |
* zone_destroy() to avoid missed signals. |
|
1481 |
*/ |
|
1482 |
mutex_enter(&zonehash_lock); |
|
1483 |
cv_broadcast(&zone_destroy_cv); |
|
1484 |
mutex_exit(&zonehash_lock); |
|
1485 |
} |
|
1486 |
} |
|
1487 |
||
1488 |
void |
|
1489 |
zone_cred_hold(zone_t *z) |
|
1490 |
{ |
|
1491 |
mutex_enter(&z->zone_lock); |
|
1492 |
z->zone_cred_ref++; |
|
1493 |
ASSERT(z->zone_cred_ref != 0); |
|
1494 |
mutex_exit(&z->zone_lock); |
|
1495 |
} |
|
1496 |
||
1497 |
void |
|
1498 |
zone_cred_rele(zone_t *z) |
|
1499 |
{ |
|
1500 |
boolean_t wakeup; |
|
1501 |
||
1502 |
mutex_enter(&z->zone_lock); |
|
1503 |
ASSERT(z->zone_cred_ref != 0); |
|
1504 |
z->zone_cred_ref--; |
|
1505 |
if (z->zone_ref == 0 && z->zone_cred_ref == 0) { |
|
1506 |
/* no more refs, free the structure */ |
|
1507 |
mutex_exit(&z->zone_lock); |
|
1508 |
zone_free(z); |
|
1509 |
return; |
|
1510 |
} |
|
1511 |
/* |
|
1512 |
* If zone_destroy is waiting for the cred references to drain |
|
1513 |
* out, and they have, signal it. |
|
1514 |
*/ |
|
1515 |
wakeup = (zone_wait_for_cred && ZONE_IS_UNREF(z) && |
|
1516 |
zone_status_get(z) >= ZONE_IS_DEAD); |
|
1517 |
mutex_exit(&z->zone_lock); |
|
1518 |
||
1519 |
if (wakeup) { |
|
1520 |
/* |
|
1521 |
* Grabbing zonehash_lock here effectively synchronizes with |
|
1522 |
* zone_destroy() to avoid missed signals. |
|
1523 |
*/ |
|
1524 |
mutex_enter(&zonehash_lock); |
|
1525 |
cv_broadcast(&zone_destroy_cv); |
|
1526 |
mutex_exit(&zonehash_lock); |
|
1527 |
} |
|
1528 |
} |
|
1529 |
||
1530 |
void |
|
1531 |
zone_task_hold(zone_t *z) |
|
1532 |
{ |
|
1533 |
mutex_enter(&z->zone_lock); |
|
1534 |
z->zone_ntasks++; |
|
1535 |
ASSERT(z->zone_ntasks != 0); |
|
1536 |
mutex_exit(&z->zone_lock); |
|
1537 |
} |
|
1538 |
||
1539 |
void |
|
1540 |
zone_task_rele(zone_t *zone) |
|
1541 |
{ |
|
1542 |
uint_t refcnt; |
|
1543 |
||
1544 |
mutex_enter(&zone->zone_lock); |
|
1545 |
ASSERT(zone->zone_ntasks != 0); |
|
1546 |
refcnt = --zone->zone_ntasks; |
|
1547 |
if (refcnt > 1) { /* Common case */ |
|
1548 |
mutex_exit(&zone->zone_lock); |
|
1549 |
return; |
|
1550 |
} |
|
1551 |
zone_hold_locked(zone); /* so we can use the zone_t later */ |
|
1552 |
mutex_exit(&zone->zone_lock); |
|
1553 |
if (refcnt == 1) { |
|
1554 |
/* |
|
1555 |
* See if the zone is shutting down. |
|
1556 |
*/ |
|
1557 |
mutex_enter(&zone_status_lock); |
|
1558 |
if (zone_status_get(zone) != ZONE_IS_SHUTTING_DOWN) { |
|
1559 |
goto out; |
|
1560 |
} |
|
1561 |
||
1562 |
/* |
|
1563 |
* Make sure the ntasks didn't change since we |
|
1564 |
* dropped zone_lock. |
|
1565 |
*/ |
|
1566 |
mutex_enter(&zone->zone_lock); |
|
1567 |
if (refcnt != zone->zone_ntasks) { |
|
1568 |
mutex_exit(&zone->zone_lock); |
|
1569 |
goto out; |
|
1570 |
} |
|
1571 |
mutex_exit(&zone->zone_lock); |
|
1572 |
||
1573 |
/* |
|
1574 |
* No more user processes in the zone. The zone is empty. |
|
1575 |
*/ |
|
1576 |
zone_status_set(zone, ZONE_IS_EMPTY); |
|
1577 |
goto out; |
|
1578 |
} |
|
1579 |
||
1580 |
ASSERT(refcnt == 0); |
|
1581 |
/* |
|
1582 |
* zsched has exited; the zone is dead. |
|
1583 |
*/ |
|
1584 |
zone->zone_zsched = NULL; /* paranoia */ |
|
1585 |
mutex_enter(&zone_status_lock); |
|
1586 |
zone_status_set(zone, ZONE_IS_DEAD); |
|
1587 |
out: |
|
1588 |
mutex_exit(&zone_status_lock); |
|
1589 |
zone_rele(zone); |
|
1590 |
} |
|
1591 |
||
1592 |
zoneid_t |
|
1593 |
getzoneid(void) |
|
1594 |
{ |
|
1595 |
return (curproc->p_zone->zone_id); |
|
1596 |
} |
|
1597 |
||
1598 |
/* |
|
1599 |
* Internal versions of zone_find_by_*(). These don't zone_hold() or |
|
1600 |
* check the validity of a zone's state. |
|
1601 |
*/ |
|
1602 |
static zone_t * |
|
1603 |
zone_find_all_by_id(zoneid_t zoneid) |
|
1604 |
{ |
|
1605 |
mod_hash_val_t hv; |
|
1606 |
zone_t *zone = NULL; |
|
1607 |
||
1608 |
ASSERT(MUTEX_HELD(&zonehash_lock)); |
|
1609 |
||
1610 |
if (mod_hash_find(zonehashbyid, |
|
1611 |
(mod_hash_key_t)(uintptr_t)zoneid, &hv) == 0) |
|
1612 |
zone = (zone_t *)hv; |
|
1613 |
return (zone); |
|
1614 |
} |
|
1615 |
||
1616 |
static zone_t * |
|
1676 | 1617 |
zone_find_all_by_label(const ts_label_t *label) |
1618 |
{ |
|
1619 |
mod_hash_val_t hv; |
|
1620 |
zone_t *zone = NULL; |
|
1621 |
||
1622 |
ASSERT(MUTEX_HELD(&zonehash_lock)); |
|
1623 |
||
1624 |
/* |
|
1625 |
* zonehashbylabel is not maintained for unlabeled systems |
|
1626 |
*/ |
|
1627 |
if (!is_system_labeled()) |
|
1628 |
return (NULL); |
|
1629 |
if (mod_hash_find(zonehashbylabel, (mod_hash_key_t)label, &hv) == 0) |
|
1630 |
zone = (zone_t *)hv; |
|
1631 |
return (zone); |
|
1632 |
} |
|
1633 |
||
1634 |
static zone_t * |
|
0 | 1635 |
zone_find_all_by_name(char *name) |
1636 |
{ |
|
1637 |
mod_hash_val_t hv; |
|
1638 |
zone_t *zone = NULL; |
|
1639 |
||
1640 |
ASSERT(MUTEX_HELD(&zonehash_lock)); |
|
1641 |
||
1642 |
if (mod_hash_find(zonehashbyname, (mod_hash_key_t)name, &hv) == 0) |
|
1643 |
zone = (zone_t *)hv; |
|
1644 |
return (zone); |
|
1645 |
} |
|
1646 |
||
1647 |
/* |
|
1648 |
* Public interface for looking up a zone by zoneid. Only returns the zone if |
|
1649 |
* it is fully initialized, and has not yet begun the zone_destroy() sequence. |
|
1650 |
* Caller must call zone_rele() once it is done with the zone. |
|
1651 |
* |
|
1652 |
* The zone may begin the zone_destroy() sequence immediately after this |
|
1653 |
* function returns, but may be safely used until zone_rele() is called. |
|
1654 |
*/ |
|
1655 |
zone_t * |
|
1656 |
zone_find_by_id(zoneid_t zoneid) |
|
1657 |
{ |
|
1658 |
zone_t *zone; |
|
1659 |
zone_status_t status; |
|
1660 |
||
1661 |
mutex_enter(&zonehash_lock); |
|
1662 |
if ((zone = zone_find_all_by_id(zoneid)) == NULL) { |
|
1663 |
mutex_exit(&zonehash_lock); |
|
1664 |
return (NULL); |
|
1665 |
} |
|
1666 |
status = zone_status_get(zone); |
|
1667 |
if (status < ZONE_IS_READY || status > ZONE_IS_DOWN) { |
|
1668 |
/* |
|
1669 |
* For all practical purposes the zone doesn't exist. |
|
1670 |
*/ |
|
1671 |
mutex_exit(&zonehash_lock); |
|
1672 |
return (NULL); |
|
1673 |
} |
|
1674 |
zone_hold(zone); |
|
1675 |
mutex_exit(&zonehash_lock); |
|
1676 |
return (zone); |
|
1677 |
} |
|
1678 |
||
1679 |
/* |
|
1676 | 1680 |
* Similar to zone_find_by_id, but using zone label as the key. |
1681 |
*/ |
|
1682 |
zone_t * |
|
1683 |
zone_find_by_label(const ts_label_t *label) |
|
1684 |
{ |
|
1685 |
zone_t *zone; |
|
2110
31cba59b38be
6403267 address remaining issues raised during TX code reviews
rica
parents:
1996
diff
changeset
|
1686 |
zone_status_t status; |
1676 | 1687 |
|
1688 |
mutex_enter(&zonehash_lock); |
|
1689 |
if ((zone = zone_find_all_by_label(label)) == NULL) { |
|
1690 |
mutex_exit(&zonehash_lock); |
|
1691 |
return (NULL); |
|
1692 |
} |
|
2110
31cba59b38be
6403267 address remaining issues raised during TX code reviews
rica
parents:
1996
diff
changeset
|
1693 |
|
31cba59b38be
6403267 address remaining issues raised during TX code reviews
rica
parents:
1996
diff
changeset
|
1694 |
status = zone_status_get(zone); |
31cba59b38be
6403267 address remaining issues raised during TX code reviews
rica
parents:
1996
diff
changeset
|
1695 |
if (status > ZONE_IS_DOWN) { |
1676 | 1696 |
/* |
1697 |
* For all practical purposes the zone doesn't exist. |
|
1698 |
*/ |
|
2110
31cba59b38be
6403267 address remaining issues raised during TX code reviews
rica
parents:
1996
diff
changeset
|
1699 |
mutex_exit(&zonehash_lock); |
31cba59b38be
6403267 address remaining issues raised during TX code reviews
rica
parents:
1996
diff
changeset
|
1700 |
return (NULL); |
1676 | 1701 |
} |
2110
31cba59b38be
6403267 address remaining issues raised during TX code reviews
rica
parents:
1996
diff
changeset
|
1702 |
zone_hold(zone); |
1676 | 1703 |
mutex_exit(&zonehash_lock); |
1704 |
return (zone); |
|
1705 |
} |
|
1706 |
||
1707 |
/* |
|
0 | 1708 |
* Similar to zone_find_by_id, but using zone name as the key. |
1709 |
*/ |
|
1710 |
zone_t * |
|
1711 |
zone_find_by_name(char *name) |
|
1712 |
{ |
|
1713 |
zone_t *zone; |
|
1714 |
zone_status_t status; |
|
1715 |
||
1716 |
mutex_enter(&zonehash_lock); |
|
1717 |
if ((zone = zone_find_all_by_name(name)) == NULL) { |
|
1718 |
mutex_exit(&zonehash_lock); |
|
1719 |
return (NULL); |
|
1720 |
} |
|
1721 |
status = zone_status_get(zone); |
|
1722 |
if (status < ZONE_IS_READY || status > ZONE_IS_DOWN) { |
|
1723 |
/* |
|
1724 |
* For all practical purposes the zone doesn't exist. |
|
1725 |
*/ |
|
1726 |
mutex_exit(&zonehash_lock); |
|
1727 |
return (NULL); |
|
1728 |
} |
|
1729 |
zone_hold(zone); |
|
1730 |
mutex_exit(&zonehash_lock); |
|
1731 |
return (zone); |
|
1732 |
} |
|
1733 |
||
1734 |
/* |
|
1735 |
* Similar to zone_find_by_id(), using the path as a key. For instance, |
|
1736 |
* if there is a zone "foo" rooted at /foo/root, and the path argument |
|
1737 |
* is "/foo/root/proc", it will return the held zone_t corresponding to |
|
1738 |
* zone "foo". |
|
1739 |
* |
|
1740 |
* zone_find_by_path() always returns a non-NULL value, since at the |
|
1741 |
* very least every path will be contained in the global zone. |
|
1742 |
* |
|
1743 |
* As with the other zone_find_by_*() functions, the caller is |
|
1744 |
* responsible for zone_rele()ing the return value of this function. |
|
1745 |
*/ |
|
1746 |
zone_t * |
|
1747 |
zone_find_by_path(const char *path) |
|
1748 |
{ |
|
1749 |
zone_t *zone; |
|
1750 |
zone_t *zret = NULL; |
|
1751 |
zone_status_t status; |
|
1752 |
||
1753 |
if (path == NULL) { |
|
1754 |
/* |
|
1755 |
* Call from rootconf(). |
|
1756 |
*/ |
|
1757 |
zone_hold(global_zone); |
|
1758 |
return (global_zone); |
|
1759 |
} |
|
1760 |
ASSERT(*path == '/'); |
|
1761 |
mutex_enter(&zonehash_lock); |
|
1762 |
for (zone = list_head(&zone_active); zone != NULL; |
|
1763 |
zone = list_next(&zone_active, zone)) { |
|
1764 |
if (ZONE_PATH_VISIBLE(path, zone)) |
|
1765 |
zret = zone; |
|
1766 |
} |
|
1767 |
ASSERT(zret != NULL); |
|
1768 |
status = zone_status_get(zret); |
|
1769 |
if (status < ZONE_IS_READY || status > ZONE_IS_DOWN) { |
|
1770 |
/* |
|
1771 |
* Zone practically doesn't exist. |
|
1772 |
*/ |
|
1773 |
zret = global_zone; |
|
1774 |
} |
|
1775 |
zone_hold(zret); |
|
1776 |
mutex_exit(&zonehash_lock); |
|
1777 |
return (zret); |
|
1778 |
} |
|
1779 |
||
1780 |
/* |
|
1781 |
* Get the number of cpus visible to this zone. The system-wide global |
|
1782 |
* 'ncpus' is returned if pools are disabled, the caller is in the |
|
1783 |
* global zone, or a NULL zone argument is passed in. |
|
1784 |
*/ |
|
1785 |
int |
|
1786 |
zone_ncpus_get(zone_t *zone) |
|
1787 |
{ |
|
1788 |
int myncpus = zone == NULL ? 0 : zone->zone_ncpus; |
|
1789 |
||
1790 |
return (myncpus != 0 ? myncpus : ncpus); |
|
1791 |
} |
|
1792 |
||
1793 |
/* |
|
1794 |
* Get the number of online cpus visible to this zone. The system-wide |
|
1795 |
* global 'ncpus_online' is returned if pools are disabled, the caller |
|
1796 |
* is in the global zone, or a NULL zone argument is passed in. |
|
1797 |
*/ |
|
1798 |
int |
|
1799 |
zone_ncpus_online_get(zone_t *zone) |
|
1800 |
{ |
|
1801 |
int myncpus_online = zone == NULL ? 0 : zone->zone_ncpus_online; |
|
1802 |
||
1803 |
return (myncpus_online != 0 ? myncpus_online : ncpus_online); |
|
1804 |
} |
|
1805 |
||
1806 |
/* |
|
1807 |
* Return the pool to which the zone is currently bound. |
|
1808 |
*/ |
|
1809 |
pool_t * |
|
1810 |
zone_pool_get(zone_t *zone) |
|
1811 |
{ |
|
1812 |
ASSERT(pool_lock_held()); |
|
1813 |
||
1814 |
return (zone->zone_pool); |
|
1815 |
} |
|
1816 |
||
1817 |
/* |
|
1818 |
* Set the zone's pool pointer and update the zone's visibility to match |
|
1819 |
* the resources in the new pool. |
|
1820 |
*/ |
|
1821 |
void |
|
1822 |
zone_pool_set(zone_t *zone, pool_t *pool) |
|
1823 |
{ |
|
1824 |
ASSERT(pool_lock_held()); |
|
1825 |
ASSERT(MUTEX_HELD(&cpu_lock)); |
|
1826 |
||
1827 |
zone->zone_pool = pool; |
|
1828 |
zone_pset_set(zone, pool->pool_pset->pset_id); |
|
1829 |
} |
|
1830 |
||
1831 |
/* |
|
1832 |
* Return the cached value of the id of the processor set to which the |
|
1833 |
* zone is currently bound. The value will be ZONE_PS_INVAL if the pools |
|
1834 |
* facility is disabled. |
|
1835 |
*/ |
|
1836 |
psetid_t |
|
1837 |
zone_pset_get(zone_t *zone) |
|
1838 |
{ |
|
1839 |
ASSERT(MUTEX_HELD(&cpu_lock)); |
|
1840 |
||
1841 |
return (zone->zone_psetid); |
|
1842 |
} |
|
1843 |
||
1844 |
/* |
|
1845 |
* Set the cached value of the id of the processor set to which the zone |
|
1846 |
* is currently bound. Also update the zone's visibility to match the |
|
1847 |
* resources in the new processor set. |
|
1848 |
*/ |
|
1849 |
void |
|
1850 |
zone_pset_set(zone_t *zone, psetid_t newpsetid) |
|
1851 |
{ |
|
1852 |
psetid_t oldpsetid; |
|
1853 |
||
1854 |
ASSERT(MUTEX_HELD(&cpu_lock)); |
|
1855 |
oldpsetid = zone_pset_get(zone); |
|
1856 |
||
1857 |
if (oldpsetid == newpsetid) |
|
1858 |
return; |
|
1859 |
/* |
|
1860 |
* Global zone sees all. |
|
1861 |
*/ |
|
1862 |
if (zone != global_zone) { |
|
1863 |
zone->zone_psetid = newpsetid; |
|
1864 |
if (newpsetid != ZONE_PS_INVAL) |
|
1865 |
pool_pset_visibility_add(newpsetid, zone); |
|
1866 |
if (oldpsetid != ZONE_PS_INVAL) |
|
1867 |
pool_pset_visibility_remove(oldpsetid, zone); |
|
1868 |
} |
|
1869 |
/* |
|
1870 |
* Disabling pools, so we should start using the global values |
|
1871 |
* for ncpus and ncpus_online. |
|
1872 |
*/ |
|
1873 |
if (newpsetid == ZONE_PS_INVAL) { |
|
1874 |
zone->zone_ncpus = 0; |
|
1875 |
zone->zone_ncpus_online = 0; |
|
1876 |
} |
|
1877 |
} |
|
1878 |
||
1879 |
/* |
|
1880 |
* Walk the list of active zones and issue the provided callback for |
|
1881 |
* each of them. |
|
1882 |
* |
|
1883 |
* Caller must not be holding any locks that may be acquired under |
|
1884 |
* zonehash_lock. See comment at the beginning of the file for a list of |
|
1885 |
* common locks and their interactions with zones. |
|
1886 |
*/ |
|
1887 |
int |
|
1888 |
zone_walk(int (*cb)(zone_t *, void *), void *data) |
|
1889 |
{ |
|
1890 |
zone_t *zone; |
|
1891 |
int ret = 0; |
|
1892 |
zone_status_t status; |
|
1893 |
||
1894 |
mutex_enter(&zonehash_lock); |
|
1895 |
for (zone = list_head(&zone_active); zone != NULL; |
|
1896 |
zone = list_next(&zone_active, zone)) { |
|
1897 |
/* |
|
1898 |
* Skip zones that shouldn't be externally visible. |
|
1899 |
*/ |
|
1900 |
status = zone_status_get(zone); |
|
1901 |
if (status < ZONE_IS_READY || status > ZONE_IS_DOWN) |
|
1902 |
continue; |
|
1903 |
/* |
|
1904 |
* Bail immediately if any callback invocation returns a |
|
1905 |
* non-zero value. |
|
1906 |
*/ |
|
1907 |
ret = (*cb)(zone, data); |
|
1908 |
if (ret != 0) |
|
1909 |
break; |
|
1910 |
} |
|
1911 |
mutex_exit(&zonehash_lock); |
|
1912 |
return (ret); |
|
1913 |
} |
|
1914 |
||
1915 |
static int |
|
1916 |
zone_set_root(zone_t *zone, const char *upath) |
|
1917 |
{ |
|
1918 |
vnode_t *vp; |
|
1919 |
int trycount; |
|
1920 |
int error = 0; |
|
1921 |
char *path; |
|
1922 |
struct pathname upn, pn; |
|
1923 |
size_t pathlen; |
|
1924 |
||
1925 |
if ((error = pn_get((char *)upath, UIO_USERSPACE, &upn)) != 0) |
|
1926 |
return (error); |
|
1927 |
||
1928 |
pn_alloc(&pn); |
|
1929 |
||
1930 |
/* prevent infinite loop */ |
|
1931 |
trycount = 10; |
|
1932 |
for (;;) { |
|
1933 |
if (--trycount <= 0) { |
|
1934 |
error = ESTALE; |
|
1935 |
goto out; |
|
1936 |
} |
|
1937 |
||
1938 |
if ((error = lookuppn(&upn, &pn, FOLLOW, NULLVPP, &vp)) == 0) { |
|
1939 |
/* |
|
1940 |
* VOP_ACCESS() may cover 'vp' with a new |
|
1941 |
* filesystem, if 'vp' is an autoFS vnode. |
|
1942 |
* Get the new 'vp' if so. |
|
1943 |
*/ |
|
1944 |
if ((error = VOP_ACCESS(vp, VEXEC, 0, CRED())) == 0 && |
|
1945 |
(vp->v_vfsmountedhere == NULL || |
|
1946 |
(error = traverse(&vp)) == 0)) { |
|
1947 |
pathlen = pn.pn_pathlen + 2; |
|
1948 |
path = kmem_alloc(pathlen, KM_SLEEP); |
|
1949 |
(void) strncpy(path, pn.pn_path, |
|
1950 |
pn.pn_pathlen + 1); |
|
1951 |
path[pathlen - 2] = '/'; |
|
1952 |
path[pathlen - 1] = '\0'; |
|
1953 |
pn_free(&pn); |
|
1954 |
pn_free(&upn); |
|
1955 |
||
1956 |
/* Success! */ |
|
1957 |
break; |
|
1958 |
} |
|
1959 |
VN_RELE(vp); |
|
1960 |
} |
|
1961 |
if (error != ESTALE) |
|
1962 |
goto out; |
|
1963 |
} |
|
1964 |
||
1965 |
ASSERT(error == 0); |
|
1966 |
zone->zone_rootvp = vp; /* we hold a reference to vp */ |
|
1967 |
zone->zone_rootpath = path; |
|
1968 |
zone->zone_rootpathlen = pathlen; |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
1969 |
if (pathlen > 5 && strcmp(path + pathlen - 5, "/lu/") == 0) |
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
1970 |
zone->zone_flags |= ZF_IS_SCRATCH; |
0 | 1971 |
return (0); |
1972 |
||
1973 |
out: |
|
1974 |
pn_free(&pn); |
|
1975 |
pn_free(&upn); |
|
1976 |
return (error); |
|
1977 |
} |
|
1978 |
||
1979 |
#define isalnum(c) (((c) >= '0' && (c) <= '9') || \ |
|
1980 |
((c) >= 'a' && (c) <= 'z') || \ |
|
1981 |
((c) >= 'A' && (c) <= 'Z')) |
|
1982 |
||
1983 |
static int |
|
1984 |
zone_set_name(zone_t *zone, const char *uname) |
|
1985 |
{ |
|
1986 |
char *kname = kmem_zalloc(ZONENAME_MAX, KM_SLEEP); |
|
1987 |
size_t len; |
|
1988 |
int i, err; |
|
1989 |
||
1990 |
if ((err = copyinstr(uname, kname, ZONENAME_MAX, &len)) != 0) { |
|
1991 |
kmem_free(kname, ZONENAME_MAX); |
|
1992 |
return (err); /* EFAULT or ENAMETOOLONG */ |
|
1993 |
} |
|
1994 |
||
1995 |
/* must be less than ZONENAME_MAX */ |
|
1996 |
if (len == ZONENAME_MAX && kname[ZONENAME_MAX - 1] != '\0') { |
|
1997 |
kmem_free(kname, ZONENAME_MAX); |
|
1998 |
return (EINVAL); |
|
1999 |
} |
|
2000 |
||
2001 |
/* |
|
2002 |
* Name must start with an alphanumeric and must contain only |
|
2003 |
* alphanumerics, '-', '_' and '.'. |
|
2004 |
*/ |
|
2005 |
if (!isalnum(kname[0])) { |
|
2006 |
kmem_free(kname, ZONENAME_MAX); |
|
2007 |
return (EINVAL); |
|
2008 |
} |
|
2009 |
for (i = 1; i < len - 1; i++) { |
|
2010 |
if (!isalnum(kname[i]) && kname[i] != '-' && kname[i] != '_' && |
|
2011 |
kname[i] != '.') { |
|
2012 |
kmem_free(kname, ZONENAME_MAX); |
|
2013 |
return (EINVAL); |
|
2014 |
} |
|
2015 |
} |
|
2016 |
||
2017 |
zone->zone_name = kname; |
|
2018 |
return (0); |
|
2019 |
} |
|
2020 |
||
2021 |
/* |
|
2022 |
* Similar to thread_create(), but makes sure the thread is in the appropriate |
|
2023 |
* zone's zsched process (curproc->p_zone->zone_zsched) before returning. |
|
2024 |
*/ |
|
2025 |
/*ARGSUSED*/ |
|
2026 |
kthread_t * |
|
2027 |
zthread_create( |
|
2028 |
caddr_t stk, |
|
2029 |
size_t stksize, |
|
2030 |
void (*proc)(), |
|
2031 |
void *arg, |
|
2032 |
size_t len, |
|
2033 |
pri_t pri) |
|
2034 |
{ |
|
2035 |
kthread_t *t; |
|
2036 |
zone_t *zone = curproc->p_zone; |
|
2037 |
proc_t *pp = zone->zone_zsched; |
|
2038 |
||
2039 |
zone_hold(zone); /* Reference to be dropped when thread exits */ |
|
2040 |
||
2041 |
/* |
|
2042 |
* No-one should be trying to create threads if the zone is shutting |
|
2043 |
* down and there aren't any kernel threads around. See comment |
|
2044 |
* in zthread_exit(). |
|
2045 |
*/ |
|
2046 |
ASSERT(!(zone->zone_kthreads == NULL && |
|
2047 |
zone_status_get(zone) >= ZONE_IS_EMPTY)); |
|
2048 |
/* |
|
2049 |
* Create a thread, but don't let it run until we've finished setting |
|
2050 |
* things up. |
|
2051 |
*/ |
|
2052 |
t = thread_create(stk, stksize, proc, arg, len, pp, TS_STOPPED, pri); |
|
2053 |
ASSERT(t->t_forw == NULL); |
|
2054 |
mutex_enter(&zone_status_lock); |
|
2055 |
if (zone->zone_kthreads == NULL) { |
|
2056 |
t->t_forw = t->t_back = t; |
|
2057 |
} else { |
|
2058 |
kthread_t *tx = zone->zone_kthreads; |
|
2059 |
||
2060 |
t->t_forw = tx; |
|
2061 |
t->t_back = tx->t_back; |
|
2062 |
tx->t_back->t_forw = t; |
|
2063 |
tx->t_back = t; |
|
2064 |
} |
|
2065 |
zone->zone_kthreads = t; |
|
2066 |
mutex_exit(&zone_status_lock); |
|
2067 |
||
2068 |
mutex_enter(&pp->p_lock); |
|
2069 |
t->t_proc_flag |= TP_ZTHREAD; |
|
2070 |
project_rele(t->t_proj); |
|
2071 |
t->t_proj = project_hold(pp->p_task->tk_proj); |
|
2072 |
||
2073 |
/* |
|
2074 |
* Setup complete, let it run. |
|
2075 |
*/ |
|
2076 |
thread_lock(t); |
|
2077 |
t->t_schedflag |= TS_ALLSTART; |
|
2078 |
setrun_locked(t); |
|
2079 |
thread_unlock(t); |
|
2080 |
||
2081 |
mutex_exit(&pp->p_lock); |
|
2082 |
||
2083 |
return (t); |
|
2084 |
} |
|
2085 |
||
2086 |
/* |
|
2087 |
* Similar to thread_exit(). Must be called by threads created via |
|
2088 |
* zthread_exit(). |
|
2089 |
*/ |
|
2090 |
void |
|
2091 |
zthread_exit(void) |
|
2092 |
{ |
|
2093 |
kthread_t *t = curthread; |
|
2094 |
proc_t *pp = curproc; |
|
2095 |
zone_t *zone = pp->p_zone; |
|
2096 |
||
2097 |
mutex_enter(&zone_status_lock); |
|
2098 |
||
2099 |
/* |
|
2100 |
* Reparent to p0 |
|
2101 |
*/ |
|
1075
5ef61094f66a
6355953 assertion failed: cpu == CPU, file: ../../i86pc/vm/hat_i86.c, line: 925
josephb
parents:
816
diff
changeset
|
2102 |
kpreempt_disable(); |
0 | 2103 |
mutex_enter(&pp->p_lock); |
2104 |
t->t_proc_flag &= ~TP_ZTHREAD; |
|
2105 |
t->t_procp = &p0; |
|
2106 |
hat_thread_exit(t); |
|
2107 |
mutex_exit(&pp->p_lock); |
|
1075
5ef61094f66a
6355953 assertion failed: cpu == CPU, file: ../../i86pc/vm/hat_i86.c, line: 925
josephb
parents:
816
diff
changeset
|
2108 |
kpreempt_enable(); |
0 | 2109 |
|
2110 |
if (t->t_back == t) { |
|
2111 |
ASSERT(t->t_forw == t); |
|
2112 |
/* |
|
2113 |
* If the zone is empty, once the thread count |
|
2114 |
* goes to zero no further kernel threads can be |
|
2115 |
* created. This is because if the creator is a process |
|
2116 |
* in the zone, then it must have exited before the zone |
|
2117 |
* state could be set to ZONE_IS_EMPTY. |
|
2118 |
* Otherwise, if the creator is a kernel thread in the |
|
2119 |
* zone, the thread count is non-zero. |
|
2120 |
* |
|
2121 |
* This really means that non-zone kernel threads should |
|
2122 |
* not create zone kernel threads. |
|
2123 |
*/ |
|
2124 |
zone->zone_kthreads = NULL; |
|
2125 |
if (zone_status_get(zone) == ZONE_IS_EMPTY) { |
|
2126 |
zone_status_set(zone, ZONE_IS_DOWN); |
|
2127 |
} |
|
2128 |
} else { |
|
2129 |
t->t_forw->t_back = t->t_back; |
|
2130 |
t->t_back->t_forw = t->t_forw; |
|
2131 |
if (zone->zone_kthreads == t) |
|
2132 |
zone->zone_kthreads = t->t_forw; |
|
2133 |
} |
|
2134 |
mutex_exit(&zone_status_lock); |
|
2135 |
zone_rele(zone); |
|
2136 |
thread_exit(); |
|
2137 |
/* NOTREACHED */ |
|
2138 |
} |
|
2139 |
||
2140 |
static void |
|
2141 |
zone_chdir(vnode_t *vp, vnode_t **vpp, proc_t *pp) |
|
2142 |
{ |
|
2143 |
vnode_t *oldvp; |
|
2144 |
||
2145 |
/* we're going to hold a reference here to the directory */ |
|
2146 |
VN_HOLD(vp); |
|
2147 |
||
2148 |
#ifdef C2_AUDIT |
|
2149 |
if (audit_active) /* update abs cwd/root path see c2audit.c */ |
|
2150 |
audit_chdirec(vp, vpp); |
|
2151 |
#endif |
|
2152 |
||
2153 |
mutex_enter(&pp->p_lock); |
|
2154 |
oldvp = *vpp; |
|
2155 |
*vpp = vp; |
|
2156 |
mutex_exit(&pp->p_lock); |
|
2157 |
if (oldvp != NULL) |
|
2158 |
VN_RELE(oldvp); |
|
2159 |
} |
|
2160 |
||
2161 |
/* |
|
2162 |
* Convert an rctl value represented by an nvlist_t into an rctl_val_t. |
|
2163 |
*/ |
|
2164 |
static int |
|
2165 |
nvlist2rctlval(nvlist_t *nvl, rctl_val_t *rv) |
|
2166 |
{ |
|
2167 |
nvpair_t *nvp = NULL; |
|
2168 |
boolean_t priv_set = B_FALSE; |
|
2169 |
boolean_t limit_set = B_FALSE; |
|
2170 |
boolean_t action_set = B_FALSE; |
|
2171 |
||
2172 |
while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { |
|
2173 |
const char *name; |
|
2174 |
uint64_t ui64; |
|
2175 |
||
2176 |
name = nvpair_name(nvp); |
|
2177 |
if (nvpair_type(nvp) != DATA_TYPE_UINT64) |
|
2178 |
return (EINVAL); |
|
2179 |
(void) nvpair_value_uint64(nvp, &ui64); |
|
2180 |
if (strcmp(name, "privilege") == 0) { |
|
2181 |
/* |
|
2182 |
* Currently only privileged values are allowed, but |
|
2183 |
* this may change in the future. |
|
2184 |
*/ |
|
2185 |
if (ui64 != RCPRIV_PRIVILEGED) |
|
2186 |
return (EINVAL); |
|
2187 |
rv->rcv_privilege = ui64; |
|
2188 |
priv_set = B_TRUE; |
|
2189 |
} else if (strcmp(name, "limit") == 0) { |
|
2190 |
rv->rcv_value = ui64; |
|
2191 |
limit_set = B_TRUE; |
|
2192 |
} else if (strcmp(name, "action") == 0) { |
|
2193 |
if (ui64 != RCTL_LOCAL_NOACTION && |
|
2194 |
ui64 != RCTL_LOCAL_DENY) |
|
2195 |
return (EINVAL); |
|
2196 |
rv->rcv_flagaction = ui64; |
|
2197 |
action_set = B_TRUE; |
|
2198 |
} else { |
|
2199 |
return (EINVAL); |
|
2200 |
} |
|
2201 |
} |
|
2202 |
||
2203 |
if (!(priv_set && limit_set && action_set)) |
|
2204 |
return (EINVAL); |
|
2205 |
rv->rcv_action_signal = 0; |
|
2206 |
rv->rcv_action_recipient = NULL; |
|
2207 |
rv->rcv_action_recip_pid = -1; |
|
2208 |
rv->rcv_firing_time = 0; |
|
2209 |
||
2210 |
return (0); |
|
2211 |
} |
|
2212 |
||
2267 | 2213 |
/* |
2214 |
* Non-global zone version of start_init. |
|
2215 |
*/ |
|
0 | 2216 |
void |
2267 | 2217 |
zone_start_init(void) |
0 | 2218 |
{ |
2219 |
proc_t *p = ttoproc(curthread); |
|
2267 | 2220 |
|
2221 |
ASSERT(!INGLOBALZONE(curproc)); |
|
0 | 2222 |
|
2223 |
/* |
|
2267 | 2224 |
* We maintain zone_boot_err so that we can return the cause of the |
2225 |
* failure back to the caller of the zone_boot syscall. |
|
0 | 2226 |
*/ |
2267 | 2227 |
p->p_zone->zone_boot_err = start_init_common(); |
0 | 2228 |
|
2229 |
mutex_enter(&zone_status_lock); |
|
2230 |
if (p->p_zone->zone_boot_err != 0) { |
|
2231 |
/* |
|
2232 |
* Make sure we are still in the booting state-- we could have |
|
2233 |
* raced and already be shutting down, or even further along. |
|
2234 |
*/ |
|
2235 |
if (zone_status_get(p->p_zone) == ZONE_IS_BOOTING) |
|
2236 |
zone_status_set(p->p_zone, ZONE_IS_SHUTTING_DOWN); |
|
2237 |
mutex_exit(&zone_status_lock); |
|
2238 |
/* It's gone bad, dispose of the process */ |
|
2239 |
if (proc_exit(CLD_EXITED, p->p_zone->zone_boot_err) != 0) { |
|
390 | 2240 |
mutex_enter(&p->p_lock); |
2241 |
ASSERT(p->p_flag & SEXITLWPS); |
|
0 | 2242 |
lwp_exit(); |
2243 |
} |
|
2244 |
} else { |
|
2245 |
if (zone_status_get(p->p_zone) == ZONE_IS_BOOTING) |
|
2246 |
zone_status_set(p->p_zone, ZONE_IS_RUNNING); |
|
2247 |
mutex_exit(&zone_status_lock); |
|
2248 |
/* cause the process to return to userland. */ |
|
2249 |
lwp_rtt(); |
|
2250 |
} |
|
2251 |
} |
|
2252 |
||
2253 |
struct zsched_arg { |
|
2254 |
zone_t *zone; |
|
2255 |
nvlist_t *nvlist; |
|
2256 |
}; |
|
2257 |
||
2258 |
/* |
|
2259 |
* Per-zone "sched" workalike. The similarity to "sched" doesn't have |
|
2260 |
* anything to do with scheduling, but rather with the fact that |
|
2261 |
* per-zone kernel threads are parented to zsched, just like regular |
|
2262 |
* kernel threads are parented to sched (p0). |
|
2263 |
* |
|
2264 |
* zsched is also responsible for launching init for the zone. |
|
2265 |
*/ |
|
2266 |
static void |
|
2267 |
zsched(void *arg) |
|
2268 |
{ |
|
2269 |
struct zsched_arg *za = arg; |
|
2270 |
proc_t *pp = curproc; |
|
2271 |
proc_t *initp = proc_init; |
|
2272 |
zone_t *zone = za->zone; |
|
2273 |
cred_t *cr, *oldcred; |
|
2274 |
rctl_set_t *set; |
|
2275 |
rctl_alloc_gp_t *gp; |
|
2276 |
contract_t *ct = NULL; |
|
2277 |
task_t *tk, *oldtk; |
|
2278 |
rctl_entity_p_t e; |
|
2279 |
kproject_t *pj; |
|
2280 |
||
2281 |
nvlist_t *nvl = za->nvlist; |
|
2282 |
nvpair_t *nvp = NULL; |
|
2283 |
||
2284 |
bcopy("zsched", u.u_psargs, sizeof ("zsched")); |
|
2285 |
bcopy("zsched", u.u_comm, sizeof ("zsched")); |
|
2286 |
u.u_argc = 0; |
|
2287 |
u.u_argv = NULL; |
|
2288 |
u.u_envp = NULL; |
|
2289 |
closeall(P_FINFO(pp)); |
|
2290 |
||
2291 |
/* |
|
2292 |
* We are this zone's "zsched" process. As the zone isn't generally |
|
2293 |
* visible yet we don't need to grab any locks before initializing its |
|
2294 |
* zone_proc pointer. |
|
2295 |
*/ |
|
2296 |
zone_hold(zone); /* this hold is released by zone_destroy() */ |
|
2297 |
zone->zone_zsched = pp; |
|
2298 |
mutex_enter(&pp->p_lock); |
|
2299 |
pp->p_zone = zone; |
|
2300 |
mutex_exit(&pp->p_lock); |
|
2301 |
||
2302 |
/* |
|
2303 |
* Disassociate process from its 'parent'; parent ourselves to init |
|
2304 |
* (pid 1) and change other values as needed. |
|
2305 |
*/ |
|
2306 |
sess_create(); |
|
2307 |
||
2308 |
mutex_enter(&pidlock); |
|
2309 |
proc_detach(pp); |
|
2310 |
pp->p_ppid = 1; |
|
2311 |
pp->p_flag |= SZONETOP; |
|
2312 |
pp->p_ancpid = 1; |
|
2313 |
pp->p_parent = initp; |
|
2314 |
pp->p_psibling = NULL; |
|
2315 |
if (initp->p_child) |
|
2316 |
initp->p_child->p_psibling = pp; |
|
2317 |
pp->p_sibling = initp->p_child; |
|
2318 |
initp->p_child = pp; |
|
2319 |
||
2320 |
/* Decrement what newproc() incremented. */ |
|
2321 |
upcount_dec(crgetruid(CRED()), GLOBAL_ZONEID); |
|
2322 |
/* |
|
2323 |
* Our credentials are about to become kcred-like, so we don't care |
|
2324 |
* about the caller's ruid. |
|
2325 |
*/ |
|
2326 |
upcount_inc(crgetruid(kcred), zone->zone_id); |
|
2327 |
mutex_exit(&pidlock); |
|
2328 |
||
2329 |
/* |
|
2330 |
* getting out of global zone, so decrement lwp counts |
|
2331 |
*/ |
|
2332 |
pj = pp->p_task->tk_proj; |
|
2333 |
mutex_enter(&global_zone->zone_nlwps_lock); |
|
2334 |
pj->kpj_nlwps -= pp->p_lwpcnt; |
|
2335 |
global_zone->zone_nlwps -= pp->p_lwpcnt; |
|
2336 |
mutex_exit(&global_zone->zone_nlwps_lock); |
|
2337 |
||
2338 |
/* |
|
2339 |
* Create and join a new task in project '0' of this zone. |
|
2340 |
* |
|
2341 |
* We don't need to call holdlwps() since we know we're the only lwp in |
|
2342 |
* this process. |
|
2343 |
* |
|
2344 |
* task_join() returns with p_lock held. |
|
2345 |
*/ |
|
2346 |
tk = task_create(0, zone); |
|
2347 |
mutex_enter(&cpu_lock); |
|
2348 |
oldtk = task_join(tk, 0); |
|
2349 |
mutex_exit(&curproc->p_lock); |
|
2350 |
mutex_exit(&cpu_lock); |
|
2351 |
task_rele(oldtk); |
|
2352 |
||
2353 |
/* |
|
2354 |
* add lwp counts to zsched's zone, and increment project's task count |
|
2355 |
* due to the task created in the above tasksys_settaskid |
|
2356 |
*/ |
|
2357 |
pj = pp->p_task->tk_proj; |
|
2358 |
mutex_enter(&zone->zone_nlwps_lock); |
|
2359 |
pj->kpj_nlwps += pp->p_lwpcnt; |
|
2360 |
pj->kpj_ntasks += 1; |
|
2361 |
zone->zone_nlwps += pp->p_lwpcnt; |
|
2362 |
mutex_exit(&zone->zone_nlwps_lock); |
|
2363 |
||
2364 |
/* |
|
2365 |
* The process was created by a process in the global zone, hence the |
|
2366 |
* credentials are wrong. We might as well have kcred-ish credentials. |
|
2367 |
*/ |
|
2368 |
cr = zone->zone_kcred; |
|
2369 |
crhold(cr); |
|
2370 |
mutex_enter(&pp->p_crlock); |
|
2371 |
oldcred = pp->p_cred; |
|
2372 |
pp->p_cred = cr; |
|
2373 |
mutex_exit(&pp->p_crlock); |
|
2374 |
crfree(oldcred); |
|
2375 |
||
2376 |
/* |
|
2377 |
* Hold credentials again (for thread) |
|
2378 |
*/ |
|
2379 |
crhold(cr); |
|
2380 |
||
2381 |
/* |
|
2382 |
* p_lwpcnt can't change since this is a kernel process. |
|
2383 |
*/ |
|
2384 |
crset(pp, cr); |
|
2385 |
||
2386 |
/* |
|
2387 |
* Chroot |
|
2388 |
*/ |
|
2389 |
zone_chdir(zone->zone_rootvp, &PTOU(pp)->u_cdir, pp); |
|
2390 |
zone_chdir(zone->zone_rootvp, &PTOU(pp)->u_rdir, pp); |
|
2391 |
||
2392 |
/* |
|
2393 |
* Initialize zone's rctl set. |
|
2394 |
*/ |
|
2395 |
set = rctl_set_create(); |
|
2396 |
gp = rctl_set_init_prealloc(RCENTITY_ZONE); |
|
2397 |
mutex_enter(&pp->p_lock); |
|
2398 |
e.rcep_p.zone = zone; |
|
2399 |
e.rcep_t = RCENTITY_ZONE; |
|
2400 |
zone->zone_rctls = rctl_set_init(RCENTITY_ZONE, pp, &e, set, gp); |
|
2401 |
mutex_exit(&pp->p_lock); |
|
2402 |
rctl_prealloc_destroy(gp); |
|
2403 |
||
2404 |
/* |
|
2405 |
* Apply the rctls passed in to zone_create(). This is basically a list |
|
2406 |
* assignment: all of the old values are removed and the new ones |
|
2407 |
* inserted. That is, if an empty list is passed in, all values are |
|
2408 |
* removed. |
|
2409 |
*/ |
|
2410 |
while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { |
|
2411 |
rctl_dict_entry_t *rde; |
|
2412 |
rctl_hndl_t hndl; |
|
2413 |
char *name; |
|
2414 |
nvlist_t **nvlarray; |
|
2415 |
uint_t i, nelem; |
|
2416 |
int error; /* For ASSERT()s */ |
|
2417 |
||
2418 |
name = nvpair_name(nvp); |
|
2419 |
hndl = rctl_hndl_lookup(name); |
|
2420 |
ASSERT(hndl != -1); |
|
2421 |
rde = rctl_dict_lookup_hndl(hndl); |
|
2422 |
ASSERT(rde != NULL); |
|
2423 |
||
2424 |
for (; /* ever */; ) { |
|
2425 |
rctl_val_t oval; |
|
2426 |
||
2427 |
mutex_enter(&pp->p_lock); |
|
2428 |
error = rctl_local_get(hndl, NULL, &oval, pp); |
|
2429 |
mutex_exit(&pp->p_lock); |
|
2430 |
ASSERT(error == 0); /* Can't fail for RCTL_FIRST */ |
|
2431 |
ASSERT(oval.rcv_privilege != RCPRIV_BASIC); |
|
2432 |
if (oval.rcv_privilege == RCPRIV_SYSTEM) |
|
2433 |
break; |
|
2434 |
mutex_enter(&pp->p_lock); |
|
2435 |
error = rctl_local_delete(hndl, &oval, pp); |
|
2436 |
mutex_exit(&pp->p_lock); |
|
2437 |
ASSERT(error == 0); |
|
2438 |
} |
|
2439 |
error = nvpair_value_nvlist_array(nvp, &nvlarray, &nelem); |
|
2440 |
ASSERT(error == 0); |
|
2441 |
for (i = 0; i < nelem; i++) { |
|
2442 |
rctl_val_t *nvalp; |
|
2443 |
||
2444 |
nvalp = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); |
|
2445 |
error = nvlist2rctlval(nvlarray[i], nvalp); |
|
2446 |
ASSERT(error == 0); |
|
2447 |
/* |
|
2448 |
* rctl_local_insert can fail if the value being |
|
2449 |
* inserted is a duplicate; this is OK. |
|
2450 |
*/ |
|
2451 |
mutex_enter(&pp->p_lock); |
|
2452 |
if (rctl_local_insert(hndl, nvalp, pp) != 0) |
|
2453 |
kmem_cache_free(rctl_val_cache, nvalp); |
|
2454 |
mutex_exit(&pp->p_lock); |
|
2455 |
} |
|
2456 |
} |
|
2457 |
/* |
|
2458 |
* Tell the world that we're done setting up. |
|
2459 |
* |
|
2460 |
* At this point we want to set the zone status to ZONE_IS_READY |
|
2461 |
* and atomically set the zone's processor set visibility. Once |
|
2462 |
* we drop pool_lock() this zone will automatically get updated |
|
2463 |
* to reflect any future changes to the pools configuration. |
|
2464 |
*/ |
|
2465 |
pool_lock(); |
|
2466 |
mutex_enter(&cpu_lock); |
|
2467 |
mutex_enter(&zonehash_lock); |
|
2468 |
zone_uniqid(zone); |
|
2469 |
zone_zsd_configure(zone); |
|
2470 |
if (pool_state == POOL_ENABLED) |
|
2471 |
zone_pset_set(zone, pool_default->pool_pset->pset_id); |
|
2472 |
mutex_enter(&zone_status_lock); |
|
2473 |
ASSERT(zone_status_get(zone) == ZONE_IS_UNINITIALIZED); |
|
2474 |
zone_status_set(zone, ZONE_IS_READY); |
|
2475 |
mutex_exit(&zone_status_lock); |
|
2476 |
mutex_exit(&zonehash_lock); |
|
2477 |
mutex_exit(&cpu_lock); |
|
2478 |
pool_unlock(); |
|
2479 |
||
2480 |
/* |
|
2481 |
* Once we see the zone transition to the ZONE_IS_BOOTING state, |
|
2482 |
* we launch init, and set the state to running. |
|
2483 |
*/ |
|
2484 |
zone_status_wait_cpr(zone, ZONE_IS_BOOTING, "zsched"); |
|
2485 |
||
2486 |
if (zone_status_get(zone) == ZONE_IS_BOOTING) { |
|
2487 |
id_t cid; |
|
2488 |
||
2489 |
/* |
|
2490 |
* Ok, this is a little complicated. We need to grab the |
|
2491 |
* zone's pool's scheduling class ID; note that by now, we |
|
2492 |
* are already bound to a pool if we need to be (zoneadmd |
|
2493 |
* will have done that to us while we're in the READY |
|
2494 |
* state). *But* the scheduling class for the zone's 'init' |
|
2495 |
* must be explicitly passed to newproc, which doesn't |
|
2496 |
* respect pool bindings. |
|
2497 |
* |
|
2498 |
* We hold the pool_lock across the call to newproc() to |
|
2499 |
* close the obvious race: the pool's scheduling class |
|
2500 |
* could change before we manage to create the LWP with |
|
2501 |
* classid 'cid'. |
|
2502 |
*/ |
|
2503 |
pool_lock(); |
|
2504 |
cid = pool_get_class(zone->zone_pool); |
|
2505 |
if (cid == -1) |
|
2506 |
cid = defaultcid; |
|
2507 |
||
2508 |
/* |
|
2509 |
* If this fails, zone_boot will ultimately fail. The |
|
2510 |
* state of the zone will be set to SHUTTING_DOWN-- userland |
|
2511 |
* will have to tear down the zone, and fail, or try again. |
|
2512 |
*/ |
|
2267 | 2513 |
if ((zone->zone_boot_err = newproc(zone_start_init, NULL, cid, |
0 | 2514 |
minclsyspri - 1, &ct)) != 0) { |
2515 |
mutex_enter(&zone_status_lock); |
|
2516 |
zone_status_set(zone, ZONE_IS_SHUTTING_DOWN); |
|
2517 |
mutex_exit(&zone_status_lock); |
|
2518 |
} |
|
2519 |
pool_unlock(); |
|
2520 |
} |
|
2521 |
||
2522 |
/* |
|
2523 |
* Wait for zone_destroy() to be called. This is what we spend |
|
2524 |
* most of our life doing. |
|
2525 |
*/ |
|
2526 |
zone_status_wait_cpr(zone, ZONE_IS_DYING, "zsched"); |
|
2527 |
||
2528 |
if (ct) |
|
2529 |
/* |
|
2530 |
* At this point the process contract should be empty. |
|
2531 |
* (Though if it isn't, it's not the end of the world.) |
|
2532 |
*/ |
|
2533 |
VERIFY(contract_abandon(ct, curproc, B_TRUE) == 0); |
|
2534 |
||
2535 |
/* |
|
2536 |
* Allow kcred to be freed when all referring processes |
|
2537 |
* (including this one) go away. We can't just do this in |
|
2538 |
* zone_free because we need to wait for the zone_cred_ref to |
|
2539 |
* drop to 0 before calling zone_free, and the existence of |
|
2540 |
* zone_kcred will prevent that. Thus, we call crfree here to |
|
2541 |
* balance the crdup in zone_create. The crhold calls earlier |
|
2542 |
* in zsched will be dropped when the thread and process exit. |
|
2543 |
*/ |
|
2544 |
crfree(zone->zone_kcred); |
|
2545 |
zone->zone_kcred = NULL; |
|
2546 |
||
2547 |
exit(CLD_EXITED, 0); |
|
2548 |
} |
|
2549 |
||
2550 |
/* |
|
2551 |
* Helper function to determine if there are any submounts of the |
|
2552 |
* provided path. Used to make sure the zone doesn't "inherit" any |
|
2553 |
* mounts from before it is created. |
|
2554 |
*/ |
|
2555 |
static uint_t |
|
2556 |
zone_mount_count(const char *rootpath) |
|
2557 |
{ |
|
2558 |
vfs_t *vfsp; |
|
2559 |
uint_t count = 0; |
|
2560 |
size_t rootpathlen = strlen(rootpath); |
|
2561 |
||
2562 |
/* |
|
2563 |
* Holding zonehash_lock prevents race conditions with |
|
2564 |
* vfs_list_add()/vfs_list_remove() since we serialize with |
|
2565 |
* zone_find_by_path(). |
|
2566 |
*/ |
|
2567 |
ASSERT(MUTEX_HELD(&zonehash_lock)); |
|
2568 |
/* |
|
2569 |
* The rootpath must end with a '/' |
|
2570 |
*/ |
|
2571 |
ASSERT(rootpath[rootpathlen - 1] == '/'); |
|
2572 |
||
2573 |
/* |
|
2574 |
* This intentionally does not count the rootpath itself if that |
|
2575 |
* happens to be a mount point. |
|
2576 |
*/ |
|
2577 |
vfs_list_read_lock(); |
|
2578 |
vfsp = rootvfs; |
|
2579 |
do { |
|
2580 |
if (strncmp(rootpath, refstr_value(vfsp->vfs_mntpt), |
|
2581 |
rootpathlen) == 0) |
|
2582 |
count++; |
|
2583 |
vfsp = vfsp->vfs_next; |
|
2584 |
} while (vfsp != rootvfs); |
|
2585 |
vfs_list_unlock(); |
|
2586 |
return (count); |
|
2587 |
} |
|
2588 |
||
2589 |
/* |
|
2590 |
* Helper function to make sure that a zone created on 'rootpath' |
|
2591 |
* wouldn't end up containing other zones' rootpaths. |
|
2592 |
*/ |
|
2593 |
static boolean_t |
|
2594 |
zone_is_nested(const char *rootpath) |
|
2595 |
{ |
|
2596 |
zone_t *zone; |
|
2597 |
size_t rootpathlen = strlen(rootpath); |
|
2598 |
size_t len; |
|
2599 |
||
2600 |
ASSERT(MUTEX_HELD(&zonehash_lock)); |
|
2601 |
||
2602 |
for (zone = list_head(&zone_active); zone != NULL; |
|
2603 |
zone = list_next(&zone_active, zone)) { |
|
2604 |
if (zone == global_zone) |
|
2605 |
continue; |
|
2606 |
len = strlen(zone->zone_rootpath); |
|
2607 |
if (strncmp(rootpath, zone->zone_rootpath, |
|
2608 |
MIN(rootpathlen, len)) == 0) |
|
2609 |
return (B_TRUE); |
|
2610 |
} |
|
2611 |
return (B_FALSE); |
|
2612 |
} |
|
2613 |
||
2614 |
static int |
|
813 | 2615 |
zone_set_privset(zone_t *zone, const priv_set_t *zone_privs, |
2616 |
size_t zone_privssz) |
|
0 | 2617 |
{ |
2618 |
priv_set_t *privs = kmem_alloc(sizeof (priv_set_t), KM_SLEEP); |
|
2619 |
||
813 | 2620 |
if (zone_privssz < sizeof (priv_set_t)) |
2621 |
return (set_errno(ENOMEM)); |
|
2622 |
||
0 | 2623 |
if (copyin(zone_privs, privs, sizeof (priv_set_t))) { |
2624 |
kmem_free(privs, sizeof (priv_set_t)); |
|
2625 |
return (EFAULT); |
|
2626 |
} |
|
2627 |
||
2628 |
zone->zone_privset = privs; |
|
2629 |
return (0); |
|
2630 |
} |
|
2631 |
||
2632 |
/* |
|
2633 |
* We make creative use of nvlists to pass in rctls from userland. The list is |
|
2634 |
* a list of the following structures: |
|
2635 |
* |
|
2636 |
* (name = rctl_name, value = nvpair_list_array) |
|
2637 |
* |
|
2638 |
* Where each element of the nvpair_list_array is of the form: |
|
2639 |
* |
|
2640 |
* [(name = "privilege", value = RCPRIV_PRIVILEGED), |
|
2641 |
* (name = "limit", value = uint64_t), |
|
2642 |
* (name = "action", value = (RCTL_LOCAL_NOACTION || RCTL_LOCAL_DENY))] |
|
2643 |
*/ |
|
2644 |
static int |
|
2645 |
parse_rctls(caddr_t ubuf, size_t buflen, nvlist_t **nvlp) |
|
2646 |
{ |
|
2647 |
nvpair_t *nvp = NULL; |
|
2648 |
nvlist_t *nvl = NULL; |
|
2649 |
char *kbuf; |
|
2650 |
int error; |
|
2651 |
rctl_val_t rv; |
|
2652 |
||
2653 |
*nvlp = NULL; |
|
2654 |
||
2655 |
if (buflen == 0) |
|
2656 |
return (0); |
|
2657 |
||
2658 |
if ((kbuf = kmem_alloc(buflen, KM_NOSLEEP)) == NULL) |
|
2659 |
return (ENOMEM); |
|
2660 |
if (copyin(ubuf, kbuf, buflen)) { |
|
2661 |
error = EFAULT; |
|
2662 |
goto out; |
|
2663 |
} |
|
2664 |
if (nvlist_unpack(kbuf, buflen, &nvl, KM_SLEEP) != 0) { |
|
2665 |
/* |
|
2666 |
* nvl may have been allocated/free'd, but the value set to |
|
2667 |
* non-NULL, so we reset it here. |
|
2668 |
*/ |
|
2669 |
nvl = NULL; |
|
2670 |
error = EINVAL; |
|
2671 |
goto out; |
|
2672 |
} |
|
2673 |
while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { |
|
2674 |
rctl_dict_entry_t *rde; |
|
2675 |
rctl_hndl_t hndl; |
|
2676 |
nvlist_t **nvlarray; |
|
2677 |
uint_t i, nelem; |
|
2678 |
char *name; |
|
2679 |
||
2680 |
error = EINVAL; |
|
2681 |
name = nvpair_name(nvp); |
|
2682 |
if (strncmp(nvpair_name(nvp), "zone.", sizeof ("zone.") - 1) |
|
2683 |
!= 0 || nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) { |
|
2684 |
goto out; |
|
2685 |
} |
|
2686 |
if ((hndl = rctl_hndl_lookup(name)) == -1) { |
|
2687 |
goto out; |
|
2688 |
} |
|
2689 |
rde = rctl_dict_lookup_hndl(hndl); |
|
2690 |
error = nvpair_value_nvlist_array(nvp, &nvlarray, &nelem); |
|
2691 |
ASSERT(error == 0); |
|
2692 |
for (i = 0; i < nelem; i++) { |
|
2693 |
if (error = nvlist2rctlval(nvlarray[i], &rv)) |
|
2694 |
goto out; |
|
2695 |
} |
|
2696 |
if (rctl_invalid_value(rde, &rv)) { |
|
2697 |
error = EINVAL; |
|
2698 |
goto out; |
|
2699 |
} |
|
2700 |
} |
|
2701 |
error = 0; |
|
2702 |
*nvlp = nvl; |
|
2703 |
out: |
|
2704 |
kmem_free(kbuf, buflen); |
|
2705 |
if (error && nvl != NULL) |
|
2706 |
nvlist_free(nvl); |
|
2707 |
return (error); |
|
2708 |
} |
|
2709 |
||
2710 |
int |
|
2711 |
zone_create_error(int er_error, int er_ext, int *er_out) { |
|
2712 |
if (er_out != NULL) { |
|
2713 |
if (copyout(&er_ext, er_out, sizeof (int))) { |
|
2714 |
return (set_errno(EFAULT)); |
|
2715 |
} |
|
2716 |
} |
|
2717 |
return (set_errno(er_error)); |
|
2718 |
} |
|
2719 |
||
1676 | 2720 |
static int |
2721 |
zone_set_label(zone_t *zone, const bslabel_t *lab, uint32_t doi) |
|
2722 |
{ |
|
2723 |
ts_label_t *tsl; |
|
2724 |
bslabel_t blab; |
|
2725 |
||
2726 |
/* Get label from user */ |
|
2727 |
if (copyin(lab, &blab, sizeof (blab)) != 0) |
|
2728 |
return (EFAULT); |
|
2729 |
tsl = labelalloc(&blab, doi, KM_NOSLEEP); |
|
2730 |
if (tsl == NULL) |
|
2731 |
return (ENOMEM); |
|
2732 |
||
2733 |
zone->zone_slabel = tsl; |
|
2734 |
return (0); |
|
2735 |
} |
|
2736 |
||
0 | 2737 |
/* |
789 | 2738 |
* Parses a comma-separated list of ZFS datasets into a per-zone dictionary. |
2739 |
*/ |
|
2740 |
static int |
|
2741 |
parse_zfs(zone_t *zone, caddr_t ubuf, size_t buflen) |
|
2742 |
{ |
|
2743 |
char *kbuf; |
|
2744 |
char *dataset, *next; |
|
2745 |
zone_dataset_t *zd; |
|
2746 |
size_t len; |
|
2747 |
||
2748 |
if (ubuf == NULL || buflen == 0) |
|
2749 |
return (0); |
|
2750 |
||
2751 |
if ((kbuf = kmem_alloc(buflen, KM_NOSLEEP)) == NULL) |
|
2752 |
return (ENOMEM); |
|
2753 |
||
2754 |
if (copyin(ubuf, kbuf, buflen) != 0) { |
|
2755 |
kmem_free(kbuf, buflen); |
|
2756 |
return (EFAULT); |
|
2757 |
} |
|
2758 |
||
2759 |
dataset = next = kbuf; |
|
2760 |
for (;;) { |
|
2761 |
zd = kmem_alloc(sizeof (zone_dataset_t), KM_SLEEP); |
|
2762 |
||
2763 |
next = strchr(dataset, ','); |
|
2764 |
||
2765 |
if (next == NULL) |
|
2766 |
len = strlen(dataset); |
|
2767 |
else |
|
2768 |
len = next - dataset; |
|
2769 |
||
2770 |
zd->zd_dataset = kmem_alloc(len + 1, KM_SLEEP); |
|
2771 |
bcopy(dataset, zd->zd_dataset, len); |
|
2772 |
zd->zd_dataset[len] = '\0'; |
|
2773 |
||
2774 |
list_insert_head(&zone->zone_datasets, zd); |
|
2775 |
||
2776 |
if (next == NULL) |
|
2777 |
break; |
|
2778 |
||
2779 |
dataset = next + 1; |
|
2780 |
} |
|
2781 |
||
2782 |
kmem_free(kbuf, buflen); |
|
2783 |
return (0); |
|
2784 |
} |
|
2785 |
||
2786 |
/* |
|
0 | 2787 |
* System call to create/initialize a new zone named 'zone_name', rooted |
2788 |
* at 'zone_root', with a zone-wide privilege limit set of 'zone_privs', |
|
1676 | 2789 |
* and initialized with the zone-wide rctls described in 'rctlbuf', and |
2790 |
* with labeling set by 'match', 'doi', and 'label'. |
|
0 | 2791 |
* |
2792 |
* If extended error is non-null, we may use it to return more detailed |
|
2793 |
* error information. |
|
2794 |
*/ |
|
2795 |
static zoneid_t |
|
2796 |
zone_create(const char *zone_name, const char *zone_root, |
|
813 | 2797 |
const priv_set_t *zone_privs, size_t zone_privssz, |
2798 |
caddr_t rctlbuf, size_t rctlbufsz, |
|
1676 | 2799 |
caddr_t zfsbuf, size_t zfsbufsz, int *extended_error, |
2800 |
int match, uint32_t doi, const bslabel_t *label) |
|
0 | 2801 |
{ |
2802 |
struct zsched_arg zarg; |
|
2803 |
nvlist_t *rctls = NULL; |
|
2804 |
proc_t *pp = curproc; |
|
2805 |
zone_t *zone, *ztmp; |
|
2806 |
zoneid_t zoneid; |
|
2807 |
int error; |
|
2808 |
int error2 = 0; |
|
2809 |
char *str; |
|
2810 |
cred_t *zkcr; |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
2811 |
boolean_t insert_label_hash; |
0 | 2812 |
|
2813 |
if (secpolicy_zone_config(CRED()) != 0) |
|
2814 |
return (set_errno(EPERM)); |
|
2815 |
||
2816 |
/* can't boot zone from within chroot environment */ |
|
2817 |
if (PTOU(pp)->u_rdir != NULL && PTOU(pp)->u_rdir != rootdir) |
|
2818 |
return (zone_create_error(ENOTSUP, ZE_CHROOTED, |
|
813 | 2819 |
extended_error)); |
0 | 2820 |
|
2821 |
zone = kmem_zalloc(sizeof (zone_t), KM_SLEEP); |
|
2822 |
zoneid = zone->zone_id = id_alloc(zoneid_space); |
|
2823 |
zone->zone_status = ZONE_IS_UNINITIALIZED; |
|
2824 |
zone->zone_pool = pool_default; |
|
2825 |
zone->zone_pool_mod = gethrtime(); |
|
2826 |
zone->zone_psetid = ZONE_PS_INVAL; |
|
2827 |
zone->zone_ncpus = 0; |
|
2828 |
zone->zone_ncpus_online = 0; |
|
2829 |
mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL); |
|
2830 |
mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL); |
|
2831 |
cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL); |
|
2832 |
list_create(&zone->zone_zsd, sizeof (struct zsd_entry), |
|
2833 |
offsetof(struct zsd_entry, zsd_linkage)); |
|
789 | 2834 |
list_create(&zone->zone_datasets, sizeof (zone_dataset_t), |
2835 |
offsetof(zone_dataset_t, zd_linkage)); |
|
1676 | 2836 |
rw_init(&zone->zone_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL); |
0 | 2837 |
|
2838 |
if ((error = zone_set_name(zone, zone_name)) != 0) { |
|
2839 |
zone_free(zone); |
|
2840 |
return (zone_create_error(error, 0, extended_error)); |
|
2841 |
} |
|
2842 |
||
2843 |
if ((error = zone_set_root(zone, zone_root)) != 0) { |
|
2844 |
zone_free(zone); |
|
2845 |
return (zone_create_error(error, 0, extended_error)); |
|
2846 |
} |
|
813 | 2847 |
if ((error = zone_set_privset(zone, zone_privs, zone_privssz)) != 0) { |
0 | 2848 |
zone_free(zone); |
2849 |
return (zone_create_error(error, 0, extended_error)); |
|
2850 |
} |
|
2851 |
||
2852 |
/* initialize node name to be the same as zone name */ |
|
2853 |
zone->zone_nodename = kmem_alloc(_SYS_NMLN, KM_SLEEP); |
|
2854 |
(void) strncpy(zone->zone_nodename, zone->zone_name, _SYS_NMLN); |
|
2855 |
zone->zone_nodename[_SYS_NMLN - 1] = '\0'; |
|
2856 |
||
2857 |
zone->zone_domain = kmem_alloc(_SYS_NMLN, KM_SLEEP); |
|
2858 |
zone->zone_domain[0] = '\0'; |
|
2859 |
zone->zone_shares = 1; |
|
2860 |
zone->zone_bootargs = NULL; |
|
2267 | 2861 |
zone->zone_initname = |
2862 |
kmem_alloc(strlen(zone_default_initname) + 1, KM_SLEEP); |
|
2863 |
(void) strcpy(zone->zone_initname, zone_default_initname); |
|
0 | 2864 |
|
2865 |
/* |
|
2866 |
* Zsched initializes the rctls. |
|
2867 |
*/ |
|
2868 |
zone->zone_rctls = NULL; |
|
2869 |
||
2870 |
if ((error = parse_rctls(rctlbuf, rctlbufsz, &rctls)) != 0) { |
|
2871 |
zone_free(zone); |
|
2872 |
return (zone_create_error(error, 0, extended_error)); |
|
2873 |
} |
|
2874 |
||
789 | 2875 |
if ((error = parse_zfs(zone, zfsbuf, zfsbufsz)) != 0) { |
2876 |
zone_free(zone); |
|
2877 |
return (set_errno(error)); |
|
2878 |
} |
|
2879 |
||
0 | 2880 |
/* |
1676 | 2881 |
* Read in the trusted system parameters: |
2882 |
* match flag and sensitivity label. |
|
2883 |
*/ |
|
2884 |
zone->zone_match = match; |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
2885 |
if (is_system_labeled() && !(zone->zone_flags & ZF_IS_SCRATCH)) { |
1676 | 2886 |
error = zone_set_label(zone, label, doi); |
2887 |
if (error != 0) { |
|
2888 |
zone_free(zone); |
|
2889 |
return (set_errno(error)); |
|
2890 |
} |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
2891 |
insert_label_hash = B_TRUE; |
1676 | 2892 |
} else { |
2893 |
/* all zones get an admin_low label if system is not labeled */ |
|
2894 |
zone->zone_slabel = l_admin_low; |
|
2895 |
label_hold(l_admin_low); |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
2896 |
insert_label_hash = B_FALSE; |
1676 | 2897 |
} |
2898 |
||
2899 |
/* |
|
0 | 2900 |
* Stop all lwps since that's what normally happens as part of fork(). |
2901 |
* This needs to happen before we grab any locks to avoid deadlock |
|
2902 |
* (another lwp in the process could be waiting for the held lock). |
|
2903 |
*/ |
|
2904 |
if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK)) { |
|
2905 |
zone_free(zone); |
|
2906 |
if (rctls) |
|
2907 |
nvlist_free(rctls); |
|
2908 |
return (zone_create_error(error, 0, extended_error)); |
|
2909 |
} |
|
2910 |
||
2911 |
if (block_mounts() == 0) { |
|
2912 |
mutex_enter(&pp->p_lock); |
|
2913 |
if (curthread != pp->p_agenttp) |
|
2914 |
continuelwps(pp); |
|
2915 |
mutex_exit(&pp->p_lock); |
|
2916 |
zone_free(zone); |
|
2917 |
if (rctls) |
|
2918 |
nvlist_free(rctls); |
|
2919 |
return (zone_create_error(error, 0, extended_error)); |
|
2920 |
} |
|
2921 |
||
2922 |
/* |
|
2923 |
* Set up credential for kernel access. After this, any errors |
|
2924 |
* should go through the dance in errout rather than calling |
|
2925 |
* zone_free directly. |
|
2926 |
*/ |
|
2927 |
zone->zone_kcred = crdup(kcred); |
|
2928 |
crsetzone(zone->zone_kcred, zone); |
|
2929 |
priv_intersect(zone->zone_privset, &CR_PPRIV(zone->zone_kcred)); |
|
2930 |
priv_intersect(zone->zone_privset, &CR_EPRIV(zone->zone_kcred)); |
|
2931 |
priv_intersect(zone->zone_privset, &CR_IPRIV(zone->zone_kcred)); |
|
2932 |
priv_intersect(zone->zone_privset, &CR_LPRIV(zone->zone_kcred)); |
|
2933 |
||
2934 |
mutex_enter(&zonehash_lock); |
|
2935 |
/* |
|
2936 |
* Make sure zone doesn't already exist. |
|
1676 | 2937 |
* |
2938 |
* If the system and zone are labeled, |
|
2939 |
* make sure no other zone exists that has the same label. |
|
0 | 2940 |
*/ |
1676 | 2941 |
if ((ztmp = zone_find_all_by_name(zone->zone_name)) != NULL || |
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
2942 |
(insert_label_hash && |
1676 | 2943 |
(ztmp = zone_find_all_by_label(zone->zone_slabel)) != NULL)) { |
0 | 2944 |
zone_status_t status; |
2945 |
||
2946 |
status = zone_status_get(ztmp); |
|
2947 |
if (status == ZONE_IS_READY || status == ZONE_IS_RUNNING) |
|
2948 |
error = EEXIST; |
|
2949 |
else |
|
2950 |
error = EBUSY; |
|
2951 |
goto errout; |
|
2952 |
} |
|
2953 |
||
2954 |
/* |
|
2955 |
* Don't allow zone creations which would cause one zone's rootpath to |
|
2956 |
* be accessible from that of another (non-global) zone. |
|
2957 |
*/ |
|
2958 |
if (zone_is_nested(zone->zone_rootpath)) { |
|
2959 |
error = EBUSY; |
|
2960 |
goto errout; |
|
2961 |
} |
|
2962 |
||
2963 |
ASSERT(zonecount != 0); /* check for leaks */ |
|
2964 |
if (zonecount + 1 > maxzones) { |
|
2965 |
error = ENOMEM; |
|
2966 |
goto errout; |
|
2967 |
} |
|
2968 |
||
2969 |
if (zone_mount_count(zone->zone_rootpath) != 0) { |
|
2970 |
error = EBUSY; |
|
2971 |
error2 = ZE_AREMOUNTS; |
|
2972 |
goto errout; |
|
2973 |
} |
|
2974 |
||
2975 |
/* |
|
2976 |
* Zone is still incomplete, but we need to drop all locks while |
|
2977 |
* zsched() initializes this zone's kernel process. We |
|
2978 |
* optimistically add the zone to the hashtable and associated |
|
2979 |
* lists so a parallel zone_create() doesn't try to create the |
|
2980 |
* same zone. |
|
2981 |
*/ |
|
2982 |
zonecount++; |
|
2983 |
(void) mod_hash_insert(zonehashbyid, |
|
2984 |
(mod_hash_key_t)(uintptr_t)zone->zone_id, |
|
2985 |
(mod_hash_val_t)(uintptr_t)zone); |
|
2986 |
str = kmem_alloc(strlen(zone->zone_name) + 1, KM_SLEEP); |
|
2987 |
(void) strcpy(str, zone->zone_name); |
|
2988 |
(void) mod_hash_insert(zonehashbyname, (mod_hash_key_t)str, |
|
2989 |
(mod_hash_val_t)(uintptr_t)zone); |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
2990 |
if (insert_label_hash) { |
1676 | 2991 |
(void) mod_hash_insert(zonehashbylabel, |
2992 |
(mod_hash_key_t)zone->zone_slabel, (mod_hash_val_t)zone); |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
2993 |
zone->zone_flags |= ZF_HASHED_LABEL; |
1676 | 2994 |
} |
2995 |
||
0 | 2996 |
/* |
2997 |
* Insert into active list. At this point there are no 'hold's |
|
2998 |
* on the zone, but everyone else knows not to use it, so we can |
|
2999 |
* continue to use it. zsched() will do a zone_hold() if the |
|
3000 |
* newproc() is successful. |
|
3001 |
*/ |
|
3002 |
list_insert_tail(&zone_active, zone); |
|
3003 |
mutex_exit(&zonehash_lock); |
|
3004 |
||
3005 |
zarg.zone = zone; |
|
3006 |
zarg.nvlist = rctls; |
|
3007 |
/* |
|
3008 |
* The process, task, and project rctls are probably wrong; |
|
3009 |
* we need an interface to get the default values of all rctls, |
|
3010 |
* and initialize zsched appropriately. I'm not sure that that |
|
3011 |
* makes much of a difference, though. |
|
3012 |
*/ |
|
3013 |
if (error = newproc(zsched, (void *)&zarg, syscid, minclsyspri, NULL)) { |
|
3014 |
/* |
|
3015 |
* We need to undo all globally visible state. |
|
3016 |
*/ |
|
3017 |
mutex_enter(&zonehash_lock); |
|
3018 |
list_remove(&zone_active, zone); |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
3019 |
if (zone->zone_flags & ZF_HASHED_LABEL) { |
1676 | 3020 |
ASSERT(zone->zone_slabel != NULL); |
3021 |
(void) mod_hash_destroy(zonehashbylabel, |
|
3022 |
(mod_hash_key_t)zone->zone_slabel); |
|
3023 |
} |
|
0 | 3024 |
(void) mod_hash_destroy(zonehashbyname, |
3025 |
(mod_hash_key_t)(uintptr_t)zone->zone_name); |
|
3026 |
(void) mod_hash_destroy(zonehashbyid, |
|
3027 |
(mod_hash_key_t)(uintptr_t)zone->zone_id); |
|
3028 |
ASSERT(zonecount > 1); |
|
3029 |
zonecount--; |
|
3030 |
goto errout; |
|
3031 |
} |
|
3032 |
||
3033 |
/* |
|
3034 |
* Zone creation can't fail from now on. |
|
3035 |
*/ |
|
3036 |
||
3037 |
/* |
|
3038 |
* Let the other lwps continue. |
|
3039 |
*/ |
|
3040 |
mutex_enter(&pp->p_lock); |
|
3041 |
if (curthread != pp->p_agenttp) |
|
3042 |
continuelwps(pp); |
|
3043 |
mutex_exit(&pp->p_lock); |
|
3044 |
||
3045 |
/* |
|
3046 |
* Wait for zsched to finish initializing the zone. |
|
3047 |
*/ |
|
3048 |
zone_status_wait(zone, ZONE_IS_READY); |
|
3049 |
/* |
|
3050 |
* The zone is fully visible, so we can let mounts progress. |
|
3051 |
*/ |
|
3052 |
resume_mounts(); |
|
3053 |
if (rctls) |
|
3054 |
nvlist_free(rctls); |
|
3055 |
||
3056 |
return (zoneid); |
|
3057 |
||
3058 |
errout: |
|
3059 |
mutex_exit(&zonehash_lock); |
|
3060 |
/* |
|
3061 |
* Let the other lwps continue. |
|
3062 |
*/ |
|
3063 |
mutex_enter(&pp->p_lock); |
|
3064 |
if (curthread != pp->p_agenttp) |
|
3065 |
continuelwps(pp); |
|
3066 |
mutex_exit(&pp->p_lock); |
|
3067 |
||
3068 |
resume_mounts(); |
|
3069 |
if (rctls) |
|
3070 |
nvlist_free(rctls); |
|
3071 |
/* |
|
3072 |
* There is currently one reference to the zone, a cred_ref from |
|
3073 |
* zone_kcred. To free the zone, we call crfree, which will call |
|
3074 |
* zone_cred_rele, which will call zone_free. |
|
3075 |
*/ |
|
3076 |
ASSERT(zone->zone_cred_ref == 1); /* for zone_kcred */ |
|
3077 |
ASSERT(zone->zone_kcred->cr_ref == 1); |
|
3078 |
ASSERT(zone->zone_ref == 0); |
|
3079 |
zkcr = zone->zone_kcred; |
|
3080 |
zone->zone_kcred = NULL; |
|
3081 |
crfree(zkcr); /* triggers call to zone_free */ |
|
3082 |
return (zone_create_error(error, error2, extended_error)); |
|
3083 |
} |
|
3084 |
||
3085 |
/* |
|
3086 |
* Cause the zone to boot. This is pretty simple, since we let zoneadmd do |
|
2267 | 3087 |
* the heavy lifting. initname is the path to the program to launch |
3088 |
* at the "top" of the zone; if this is NULL, we use the system default, |
|
3089 |
* which is stored at zone_default_initname. |
|
0 | 3090 |
*/ |
3091 |
static int |
|
2267 | 3092 |
zone_boot(zoneid_t zoneid) |
0 | 3093 |
{ |
3094 |
int err; |
|
3095 |
zone_t *zone; |
|
3096 |
||
3097 |
if (secpolicy_zone_config(CRED()) != 0) |
|
3098 |
return (set_errno(EPERM)); |
|
3099 |
if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID) |
|
3100 |
return (set_errno(EINVAL)); |
|
3101 |
||
3102 |
mutex_enter(&zonehash_lock); |
|
3103 |
/* |
|
3104 |
* Look for zone under hash lock to prevent races with calls to |
|
3105 |
* zone_shutdown, zone_destroy, etc. |
|
3106 |
*/ |
|
3107 |
if ((zone = zone_find_all_by_id(zoneid)) == NULL) { |
|
3108 |
mutex_exit(&zonehash_lock); |
|
3109 |
return (set_errno(EINVAL)); |
|
3110 |
} |
|
3111 |
||
3112 |
mutex_enter(&zone_status_lock); |
|
3113 |
if (zone_status_get(zone) != ZONE_IS_READY) { |
|
3114 |
mutex_exit(&zone_status_lock); |
|
3115 |
mutex_exit(&zonehash_lock); |
|
3116 |
return (set_errno(EINVAL)); |
|
3117 |
} |
|
3118 |
zone_status_set(zone, ZONE_IS_BOOTING); |
|
3119 |
mutex_exit(&zone_status_lock); |
|
3120 |
||
3121 |
zone_hold(zone); /* so we can use the zone_t later */ |
|
3122 |
mutex_exit(&zonehash_lock); |
|
3123 |
||
3124 |
if (zone_status_wait_sig(zone, ZONE_IS_RUNNING) == 0) { |
|
3125 |
zone_rele(zone); |
|
3126 |
return (set_errno(EINTR)); |
|
3127 |
} |
|
3128 |
||
3129 |
/* |
|
3130 |
* Boot (starting init) might have failed, in which case the zone |
|
3131 |
* will go to the SHUTTING_DOWN state; an appropriate errno will |
|
3132 |
* be placed in zone->zone_boot_err, and so we return that. |
|
3133 |
*/ |
|
3134 |
err = zone->zone_boot_err; |
|
3135 |
zone_rele(zone); |
|
3136 |
return (err ? set_errno(err) : 0); |
|
3137 |
} |
|
3138 |
||
3139 |
/* |
|
3140 |
* Kills all user processes in the zone, waiting for them all to exit |
|
3141 |
* before returning. |
|
3142 |
*/ |
|
3143 |
static int |
|
3144 |
zone_empty(zone_t *zone) |
|
3145 |
{ |
|
3146 |
int waitstatus; |
|
3147 |
||
3148 |
/* |
|
3149 |
* We need to drop zonehash_lock before killing all |
|
3150 |
* processes, otherwise we'll deadlock with zone_find_* |
|
3151 |
* which can be called from the exit path. |
|
3152 |
*/ |
|
3153 |
ASSERT(MUTEX_NOT_HELD(&zonehash_lock)); |
|
3154 |
while ((waitstatus = zone_status_timedwait_sig(zone, lbolt + hz, |
|
3155 |
ZONE_IS_EMPTY)) == -1) { |
|
3156 |
killall(zone->zone_id); |
|
3157 |
} |
|
3158 |
/* |
|
3159 |
* return EINTR if we were signaled |
|
3160 |
*/ |
|
3161 |
if (waitstatus == 0) |
|
3162 |
return (EINTR); |
|
3163 |
return (0); |
|
3164 |
} |
|
3165 |
||
3166 |
/* |
|
1676 | 3167 |
* This function implements the policy for zone visibility. |
3168 |
* |
|
3169 |
* In standard Solaris, a non-global zone can only see itself. |
|
3170 |
* |
|
3171 |
* In Trusted Extensions, a labeled zone can lookup any zone whose label |
|
3172 |
* it dominates. For this test, the label of the global zone is treated as |
|
3173 |
* admin_high so it is special-cased instead of being checked for dominance. |
|
3174 |
* |
|
3175 |
* Returns true if zone attributes are viewable, false otherwise. |
|
3176 |
*/ |
|
3177 |
static boolean_t |
|
3178 |
zone_list_access(zone_t *zone) |
|
3179 |
{ |
|
3180 |
||
3181 |
if (curproc->p_zone == global_zone || |
|
3182 |
curproc->p_zone == zone) { |
|
3183 |
return (B_TRUE); |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
3184 |
} else if (is_system_labeled() && !(zone->zone_flags & ZF_IS_SCRATCH)) { |
1676 | 3185 |
bslabel_t *curproc_label; |
3186 |
bslabel_t *zone_label; |
|
3187 |
||
3188 |
curproc_label = label2bslabel(curproc->p_zone->zone_slabel); |
|
3189 |
zone_label = label2bslabel(zone->zone_slabel); |
|
3190 |
||
3191 |
if (zone->zone_id != GLOBAL_ZONEID && |
|
3192 |
bldominates(curproc_label, zone_label)) { |
|
3193 |
return (B_TRUE); |
|
3194 |
} else { |
|
3195 |
return (B_FALSE); |
|
3196 |
} |
|
3197 |
} else { |
|
3198 |
return (B_FALSE); |
|
3199 |
} |
|
3200 |
} |
|
3201 |
||
3202 |
/* |
|
0 | 3203 |
* Systemcall to start the zone's halt sequence. By the time this |
3204 |
* function successfully returns, all user processes and kernel threads |
|
3205 |
* executing in it will have exited, ZSD shutdown callbacks executed, |
|
3206 |
* and the zone status set to ZONE_IS_DOWN. |
|
3207 |
* |
|
3208 |
* It is possible that the call will interrupt itself if the caller is the |
|
3209 |
* parent of any process running in the zone, and doesn't have SIGCHLD blocked. |
|
3210 |
*/ |
|
3211 |
static int |
|
3212 |
zone_shutdown(zoneid_t zoneid) |
|
3213 |
{ |
|
3214 |
int error; |
|
3215 |
zone_t *zone; |
|
3216 |
zone_status_t status; |
|
3217 |
||
3218 |
if (secpolicy_zone_config(CRED()) != 0) |
|
3219 |
return (set_errno(EPERM)); |
|
3220 |
if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID) |
|
3221 |
return (set_errno(EINVAL)); |
|
3222 |
||
3223 |
/* |
|
3224 |
* Block mounts so that VFS_MOUNT() can get an accurate view of |
|
3225 |
* the zone's status with regards to ZONE_IS_SHUTTING down. |
|
3226 |
* |
|
3227 |
* e.g. NFS can fail the mount if it determines that the zone |
|
3228 |
* has already begun the shutdown sequence. |
|
3229 |
*/ |
|
3230 |
if (block_mounts() == 0) |
|
3231 |
return (set_errno(EINTR)); |
|
3232 |
mutex_enter(&zonehash_lock); |
|
3233 |
/* |
|
3234 |
* Look for zone under hash lock to prevent races with other |
|
3235 |
* calls to zone_shutdown and zone_destroy. |
|
3236 |
*/ |
|
3237 |
if ((zone = zone_find_all_by_id(zoneid)) == NULL) { |
|
3238 |
mutex_exit(&zonehash_lock); |
|
3239 |
resume_mounts(); |
|
3240 |
return (set_errno(EINVAL)); |
|
3241 |
} |
|
3242 |
mutex_enter(&zone_status_lock); |
|
3243 |
status = zone_status_get(zone); |
|
3244 |
/* |
|
3245 |
* Fail if the zone isn't fully initialized yet. |
|
3246 |
*/ |
|
3247 |
if (status < ZONE_IS_READY) { |
|
3248 |
mutex_exit(&zone_status_lock); |
|
3249 |
mutex_exit(&zonehash_lock); |
|
3250 |
resume_mounts(); |
|
3251 |
return (set_errno(EINVAL)); |
|
3252 |
} |
|
3253 |
/* |
|
3254 |
* If conditions required for zone_shutdown() to return have been met, |
|
3255 |
* return success. |
|
3256 |
*/ |
|
3257 |
if (status >= ZONE_IS_DOWN) { |
|
3258 |
mutex_exit(&zone_status_lock); |
|
3259 |
mutex_exit(&zonehash_lock); |
|
3260 |
resume_mounts(); |
|
3261 |
return (0); |
|
3262 |
} |
|
3263 |
/* |
|
3264 |
* If zone_shutdown() hasn't been called before, go through the motions. |
|
3265 |
* If it has, there's nothing to do but wait for the kernel threads to |
|
3266 |
* drain. |
|
3267 |
*/ |
|
3268 |
if (status < ZONE_IS_EMPTY) { |
|
3269 |
uint_t ntasks; |
|
3270 |
||
3271 |
mutex_enter(&zone->zone_lock); |
|
3272 |
if ((ntasks = zone->zone_ntasks) != 1) { |
|
3273 |
/* |
|
3274 |
* There's still stuff running. |
|
3275 |
*/ |
|
3276 |
zone_status_set(zone, ZONE_IS_SHUTTING_DOWN); |
|
3277 |
} |
|
3278 |
mutex_exit(&zone->zone_lock); |
|
3279 |
if (ntasks == 1) { |
|
3280 |
/* |
|
3281 |
* The only way to create another task is through |
|
3282 |
* zone_enter(), which will block until we drop |
|
3283 |
* zonehash_lock. The zone is empty. |
|
3284 |
*/ |
|
3285 |
if (zone->zone_kthreads == NULL) { |
|
3286 |
/* |
|
3287 |
* Skip ahead to ZONE_IS_DOWN |
|
3288 |
*/ |
|
3289 |
zone_status_set(zone, ZONE_IS_DOWN); |
|
3290 |
} else { |
|
3291 |
zone_status_set(zone, ZONE_IS_EMPTY); |
|
3292 |
} |
|
3293 |
} |
|
3294 |
} |
|
3295 |
zone_hold(zone); /* so we can use the zone_t later */ |
|
3296 |
mutex_exit(&zone_status_lock); |
|
3297 |
mutex_exit(&zonehash_lock); |
|
3298 |
resume_mounts(); |
|
3299 |
||
3300 |
if (error = zone_empty(zone)) { |
|
3301 |
zone_rele(zone); |
|
3302 |
return (set_errno(error)); |
|
3303 |
} |
|
3304 |
/* |
|
3305 |
* After the zone status goes to ZONE_IS_DOWN this zone will no |
|
3306 |
* longer be notified of changes to the pools configuration, so |
|
3307 |
* in order to not end up with a stale pool pointer, we point |
|
3308 |
* ourselves at the default pool and remove all resource |
|
3309 |
* visibility. This is especially important as the zone_t may |
|
3310 |
* languish on the deathrow for a very long time waiting for |
|
3311 |
* cred's to drain out. |
|
3312 |
* |
|
3313 |
* This rebinding of the zone can happen multiple times |
|
3314 |
* (presumably due to interrupted or parallel systemcalls) |
|
3315 |
* without any adverse effects. |
|
3316 |
*/ |
|
3317 |
if (pool_lock_intr() != 0) { |
|
3318 |
zone_rele(zone); |
|
3319 |
return (set_errno(EINTR)); |
|
3320 |
} |
|
3321 |
if (pool_state == POOL_ENABLED) { |
|
3322 |
mutex_enter(&cpu_lock); |
|
3323 |
zone_pool_set(zone, pool_default); |
|
3324 |
/* |
|
3325 |
* The zone no longer needs to be able to see any cpus. |
|
3326 |
*/ |
|
3327 |
zone_pset_set(zone, ZONE_PS_INVAL); |
|
3328 |
mutex_exit(&cpu_lock); |
|
3329 |
} |
|
3330 |
pool_unlock(); |
|
3331 |
||
3332 |
/* |
|
3333 |
* ZSD shutdown callbacks can be executed multiple times, hence |
|
3334 |
* it is safe to not be holding any locks across this call. |
|
3335 |
*/ |
|
3336 |
zone_zsd_callbacks(zone, ZSD_SHUTDOWN); |
|
3337 |
||
3338 |
mutex_enter(&zone_status_lock); |
|
3339 |
if (zone->zone_kthreads == NULL && zone_status_get(zone) < ZONE_IS_DOWN) |
|
3340 |
zone_status_set(zone, ZONE_IS_DOWN); |
|
3341 |
mutex_exit(&zone_status_lock); |
|
3342 |
||
3343 |
/* |
|
3344 |
* Wait for kernel threads to drain. |
|
3345 |
*/ |
|
3346 |
if (!zone_status_wait_sig(zone, ZONE_IS_DOWN)) { |
|
3347 |
zone_rele(zone); |
|
3348 |
return (set_errno(EINTR)); |
|
3349 |
} |
|
3350 |
zone_rele(zone); |
|
3351 |
return (0); |
|
3352 |
} |
|
3353 |
||
3354 |
/* |
|
3355 |
* Systemcall entry point to finalize the zone halt process. The caller |
|
3356 |
* must have already successfully callefd zone_shutdown(). |
|
3357 |
* |
|
3358 |
* Upon successful completion, the zone will have been fully destroyed: |
|
3359 |
* zsched will have exited, destructor callbacks executed, and the zone |
|
3360 |
* removed from the list of active zones. |
|
3361 |
*/ |
|
3362 |
static int |
|
3363 |
zone_destroy(zoneid_t zoneid) |
|
3364 |
{ |
|
3365 |
uint64_t uniqid; |
|
3366 |
zone_t *zone; |
|
3367 |
zone_status_t status; |
|
3368 |
||
3369 |
if (secpolicy_zone_config(CRED()) != 0) |
|
3370 |
return (set_errno(EPERM)); |
|
3371 |
if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID) |
|
3372 |
return (set_errno(EINVAL)); |
|
3373 |
||
3374 |
mutex_enter(&zonehash_lock); |
|
3375 |
/* |
|
3376 |
* Look for zone under hash lock to prevent races with other |
|
3377 |
* calls to zone_destroy. |
|
3378 |
*/ |
|
3379 |
if ((zone = zone_find_all_by_id(zoneid)) == NULL) { |
|
3380 |
mutex_exit(&zonehash_lock); |
|
3381 |
return (set_errno(EINVAL)); |
|
3382 |
} |
|
3383 |
||
3384 |
if (zone_mount_count(zone->zone_rootpath) != 0) { |
|
3385 |
mutex_exit(&zonehash_lock); |
|
3386 |
return (set_errno(EBUSY)); |
|
3387 |
} |
|
3388 |
mutex_enter(&zone_status_lock); |
|
3389 |
status = zone_status_get(zone); |
|
3390 |
if (status < ZONE_IS_DOWN) { |
|
3391 |
mutex_exit(&zone_status_lock); |
|
3392 |
mutex_exit(&zonehash_lock); |
|
3393 |
return (set_errno(EBUSY)); |
|
3394 |
} else if (status == ZONE_IS_DOWN) { |
|
3395 |
zone_status_set(zone, ZONE_IS_DYING); /* Tell zsched to exit */ |
|
3396 |
} |
|
3397 |
mutex_exit(&zone_status_lock); |
|
3398 |
zone_hold(zone); |
|
3399 |
mutex_exit(&zonehash_lock); |
|
3400 |
||
3401 |
/* |
|
3402 |
* wait for zsched to exit |
|
3403 |
*/ |
|
3404 |
zone_status_wait(zone, ZONE_IS_DEAD); |
|
3405 |
zone_zsd_callbacks(zone, ZSD_DESTROY); |
|
3406 |
uniqid = zone->zone_uniqid; |
|
3407 |
zone_rele(zone); |
|
3408 |
zone = NULL; /* potentially free'd */ |
|
3409 |
||
3410 |
mutex_enter(&zonehash_lock); |
|
3411 |
for (; /* ever */; ) { |
|
3412 |
boolean_t unref; |
|
3413 |
||
3414 |
if ((zone = zone_find_all_by_id(zoneid)) == NULL || |
|
3415 |
zone->zone_uniqid != uniqid) { |
|
3416 |
/* |
|
3417 |
* The zone has gone away. Necessary conditions |
|
3418 |
* are met, so we return success. |
|
3419 |
*/ |
|
3420 |
mutex_exit(&zonehash_lock); |
|
3421 |
return (0); |
|
3422 |
} |
|
3423 |
mutex_enter(&zone->zone_lock); |
|
3424 |
unref = ZONE_IS_UNREF(zone); |
|
3425 |
mutex_exit(&zone->zone_lock); |
|
3426 |
if (unref) { |
|
3427 |
/* |
|
3428 |
* There is only one reference to the zone -- that |
|
3429 |
* added when the zone was added to the hashtables -- |
|
3430 |
* and things will remain this way until we drop |
|
3431 |
* zonehash_lock... we can go ahead and cleanup the |
|
3432 |
* zone. |
|
3433 |
*/ |
|
3434 |
break; |
|
3435 |
} |
|
3436 |
||
3437 |
if (cv_wait_sig(&zone_destroy_cv, &zonehash_lock) == 0) { |
|
3438 |
/* Signaled */ |
|
3439 |
mutex_exit(&zonehash_lock); |
|
3440 |
return (set_errno(EINTR)); |
|
3441 |
} |
|
3442 |
||
3443 |
} |
|
3444 |
||
3445 |
/* |
|
3446 |
* It is now safe to let the zone be recreated; remove it from the |
|
3447 |
* lists. The memory will not be freed until the last cred |
|
3448 |
* reference goes away. |
|
3449 |
*/ |
|
3450 |
ASSERT(zonecount > 1); /* must be > 1; can't destroy global zone */ |
|
3451 |
zonecount--; |
|
3452 |
/* remove from active list and hash tables */ |
|
3453 |
list_remove(&zone_active, zone); |
|
3454 |
(void) mod_hash_destroy(zonehashbyname, |
|
3455 |
(mod_hash_key_t)zone->zone_name); |
|
3456 |
(void) mod_hash_destroy(zonehashbyid, |
|
3457 |
(mod_hash_key_t)(uintptr_t)zone->zone_id); |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
3458 |
if (zone->zone_flags & ZF_HASHED_LABEL) |
1676 | 3459 |
(void) mod_hash_destroy(zonehashbylabel, |
3460 |
(mod_hash_key_t)zone->zone_slabel); |
|
0 | 3461 |
mutex_exit(&zonehash_lock); |
3462 |
||
766 | 3463 |
/* |
3464 |
* Release the root vnode; we're not using it anymore. Nor should any |
|
3465 |
* other thread that might access it exist. |
|
3466 |
*/ |
|
3467 |
if (zone->zone_rootvp != NULL) { |
|
3468 |
VN_RELE(zone->zone_rootvp); |
|
3469 |
zone->zone_rootvp = NULL; |
|
3470 |
} |
|
3471 |
||
0 | 3472 |
/* add to deathrow list */ |
3473 |
mutex_enter(&zone_deathrow_lock); |
|
3474 |
list_insert_tail(&zone_deathrow, zone); |
|
3475 |
mutex_exit(&zone_deathrow_lock); |
|
3476 |
||
3477 |
/* |
|
3478 |
* Drop last reference (which was added by zsched()), this will |
|
3479 |
* free the zone unless there are outstanding cred references. |
|
3480 |
*/ |
|
3481 |
zone_rele(zone); |
|
3482 |
return (0); |
|
3483 |
} |
|
3484 |
||
3485 |
/* |
|
3486 |
* Systemcall entry point for zone_getattr(2). |
|
3487 |
*/ |
|
3488 |
static ssize_t |
|
3489 |
zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) |
|
3490 |
{ |
|
3491 |
size_t size; |
|
3492 |
int error = 0, err; |
|
3493 |
zone_t *zone; |
|
3494 |
char *zonepath; |
|
2267 | 3495 |
char *outstr; |
0 | 3496 |
zone_status_t zone_status; |
3497 |
pid_t initpid; |
|
3498 |
boolean_t global = (curproc->p_zone == global_zone); |
|
1676 | 3499 |
boolean_t curzone = (curproc->p_zone->zone_id == zoneid); |
0 | 3500 |
|
3501 |
mutex_enter(&zonehash_lock); |
|
3502 |
if ((zone = zone_find_all_by_id(zoneid)) == NULL) { |
|
3503 |
mutex_exit(&zonehash_lock); |
|
3504 |
return (set_errno(EINVAL)); |
|
3505 |
} |
|
3506 |
zone_status = zone_status_get(zone); |
|
3507 |
if (zone_status < ZONE_IS_READY) { |
|
3508 |
mutex_exit(&zonehash_lock); |
|
3509 |
return (set_errno(EINVAL)); |
|
3510 |
} |
|
3511 |
zone_hold(zone); |
|
3512 |
mutex_exit(&zonehash_lock); |
|
3513 |
||
3514 |
/* |
|
1676 | 3515 |
* If not in the global zone, don't show information about other zones, |
3516 |
* unless the system is labeled and the local zone's label dominates |
|
3517 |
* the other zone. |
|
0 | 3518 |
*/ |
1676 | 3519 |
if (!zone_list_access(zone)) { |
0 | 3520 |
zone_rele(zone); |
3521 |
return (set_errno(EINVAL)); |
|
3522 |
} |
|
3523 |
||
3524 |
switch (attr) { |
|
3525 |
case ZONE_ATTR_ROOT: |
|
3526 |
if (global) { |
|
3527 |
/* |
|
3528 |
* Copy the path to trim the trailing "/" (except for |
|
3529 |
* the global zone). |
|
3530 |
*/ |
|
3531 |
if (zone != global_zone) |
|
3532 |
size = zone->zone_rootpathlen - 1; |
|
3533 |
else |
|
3534 |
size = zone->zone_rootpathlen; |
|
3535 |
zonepath = kmem_alloc(size, KM_SLEEP); |
|
3536 |
bcopy(zone->zone_rootpath, zonepath, size); |
|
3537 |
zonepath[size - 1] = '\0'; |
|
3538 |
} else { |
|
1676 | 3539 |
if (curzone || !is_system_labeled()) { |
3540 |
/* |
|
3541 |
* Caller is not in the global zone. |
|
3542 |
* if the query is on the current zone |
|
3543 |
* or the system is not labeled, |
|
3544 |
* just return faked-up path for current zone. |
|
3545 |
*/ |
|
3546 |
zonepath = "/"; |
|
3547 |
size = 2; |
|
3548 |
} else { |
|
3549 |
/* |
|
3550 |
* Return related path for current zone. |
|
3551 |
*/ |
|
3552 |
int prefix_len = strlen(zone_prefix); |
|
3553 |
int zname_len = strlen(zone->zone_name); |
|
3554 |
||
3555 |
size = prefix_len + zname_len + 1; |
|
3556 |
zonepath = kmem_alloc(size, KM_SLEEP); |
|
3557 |
bcopy(zone_prefix, zonepath, prefix_len); |
|
3558 |
bcopy(zone->zone_name, zonepath + |
|
2267 | 3559 |
prefix_len, zname_len); |
1676 | 3560 |
zonepath[size - 1] = '\0'; |
3561 |
} |
|
0 | 3562 |
} |
3563 |
if (bufsize > size) |
|
3564 |
bufsize = size; |
|
3565 |
if (buf != NULL) { |
|
3566 |
err = copyoutstr(zonepath, buf, bufsize, NULL); |
|
3567 |
if (err != 0 && err != ENAMETOOLONG) |
|
3568 |
error = EFAULT; |
|
3569 |
} |
|
1676 | 3570 |
if (global || (is_system_labeled() && !curzone)) |
0 | 3571 |
kmem_free(zonepath, size); |
3572 |
break; |
|
3573 |
||
3574 |
case ZONE_ATTR_NAME: |
|
3575 |
size = strlen(zone->zone_name) + 1; |
|
3576 |
if (bufsize > size) |
|
3577 |
bufsize = size; |
|
3578 |
if (buf != NULL) { |
|
3579 |
err = copyoutstr(zone->zone_name, buf, bufsize, NULL); |
|
3580 |
if (err != 0 && err != ENAMETOOLONG) |
|
3581 |
error = EFAULT; |
|
3582 |
} |
|
3583 |
break; |
|
3584 |
||
3585 |
case ZONE_ATTR_STATUS: |
|
3586 |
/* |
|
3587 |
* Since we're not holding zonehash_lock, the zone status |
|
3588 |
* may be anything; leave it up to userland to sort it out. |
|
3589 |
*/ |
|
3590 |
size = sizeof (zone_status); |
|
3591 |
if (bufsize > size) |
|
3592 |
bufsize = size; |
|
3593 |
zone_status = zone_status_get(zone); |
|
3594 |
if (buf != NULL && |
|
3595 |
copyout(&zone_status, buf, bufsize) != 0) |
|
3596 |
error = EFAULT; |
|
3597 |
break; |
|
3598 |
case ZONE_ATTR_PRIVSET: |
|
3599 |
size = sizeof (priv_set_t); |
|
3600 |
if (bufsize > size) |
|
3601 |
bufsize = size; |
|
3602 |
if (buf != NULL && |
|
3603 |
copyout(zone->zone_privset, buf, bufsize) != 0) |
|
3604 |
error = EFAULT; |
|
3605 |
break; |
|
3606 |
case ZONE_ATTR_UNIQID: |
|
3607 |
size = sizeof (zone->zone_uniqid); |
|
3608 |
if (bufsize > size) |
|
3609 |
bufsize = size; |
|
3610 |
if (buf != NULL && |
|
3611 |
copyout(&zone->zone_uniqid, buf, bufsize) != 0) |
|
3612 |
error = EFAULT; |
|
3613 |
break; |
|
3614 |
case ZONE_ATTR_POOLID: |
|
3615 |
{ |
|
3616 |
pool_t *pool; |
|
3617 |
poolid_t poolid; |
|
3618 |
||
3619 |
if (pool_lock_intr() != 0) { |
|
3620 |
error = EINTR; |
|
3621 |
break; |
|
3622 |
} |
|
3623 |
pool = zone_pool_get(zone); |
|
3624 |
poolid = pool->pool_id; |
|
3625 |
pool_unlock(); |
|
3626 |
size = sizeof (poolid); |
|
3627 |
if (bufsize > size) |
|
3628 |
bufsize = size; |
|
3629 |
if (buf != NULL && copyout(&poolid, buf, size) != 0) |
|
3630 |
error = EFAULT; |
|
3631 |
} |
|
3632 |
break; |
|
1676 | 3633 |
case ZONE_ATTR_SLBL: |
3634 |
size = sizeof (bslabel_t); |
|
3635 |
if (bufsize > size) |
|
3636 |
bufsize = size; |
|
3637 |
if (zone->zone_slabel == NULL) |
|
3638 |
error = EINVAL; |
|
3639 |
else if (buf != NULL && |
|
3640 |
copyout(label2bslabel(zone->zone_slabel), buf, |
|
3641 |
bufsize) != 0) |
|
3642 |
error = EFAULT; |
|
3643 |
break; |
|
0 | 3644 |
case ZONE_ATTR_INITPID: |
3645 |
size = sizeof (initpid); |
|
3646 |
if (bufsize > size) |
|
3647 |
bufsize = size; |
|
3648 |
initpid = zone->zone_proc_initpid; |
|
3649 |
if (initpid == -1) { |
|
3650 |
error = ESRCH; |
|
3651 |
break; |
|
3652 |
} |
|
3653 |
if (buf != NULL && |
|
3654 |
copyout(&initpid, buf, bufsize) != 0) |
|
3655 |
error = EFAULT; |
|
3656 |
break; |
|
2267 | 3657 |
case ZONE_ATTR_INITNAME: |
3658 |
size = strlen(zone->zone_initname) + 1; |
|
3659 |
if (bufsize > size) |
|
3660 |
bufsize = size; |
|
3661 |
if (buf != NULL) { |
|
3662 |
err = copyoutstr(zone->zone_initname, buf, bufsize, |
|
3663 |
NULL); |
|
3664 |
if (err != 0 && err != ENAMETOOLONG) |
|
3665 |
error = EFAULT; |
|
3666 |
} |
|
3667 |
break; |
|
3668 |
case ZONE_ATTR_BOOTARGS: |
|
3669 |
if (zone->zone_bootargs == NULL) |
|
3670 |
outstr = ""; |
|
3671 |
else |
|
3672 |
outstr = zone->zone_bootargs; |
|
3673 |
size = strlen(outstr) + 1; |
|
3674 |
if (bufsize > size) |
|
3675 |
bufsize = size; |
|
3676 |
if (buf != NULL) { |
|
3677 |
err = copyoutstr(outstr, buf, bufsize, NULL); |
|
3678 |
if (err != 0 && err != ENAMETOOLONG) |
|
3679 |
error = EFAULT; |
|
3680 |
} |
|
3681 |
break; |
|
0 | 3682 |
default: |
3683 |
error = EINVAL; |
|
3684 |
} |
|
3685 |
zone_rele(zone); |
|
3686 |
||
3687 |
if (error) |
|
3688 |
return (set_errno(error)); |
|
3689 |
return ((ssize_t)size); |
|
3690 |
} |
|
3691 |
||
3692 |
/* |
|
2267 | 3693 |
* Systemcall entry point for zone_setattr(2). |
3694 |
*/ |
|
3695 |
/*ARGSUSED*/ |
|
3696 |
static int |
|
3697 |
zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) |
|
3698 |
{ |
|
3699 |
zone_t *zone; |
|
3700 |
zone_status_t zone_status; |
|
3701 |
int err; |
|
3702 |
||
3703 |
if (secpolicy_zone_config(CRED()) != 0) |
|
3704 |
return (set_errno(EPERM)); |
|
3705 |
||
3706 |
/* |
|
3707 |
* At present, attributes can only be set on non-running, |
|
3708 |
* non-global zones. |
|
3709 |
*/ |
|
3710 |
if (zoneid == GLOBAL_ZONEID) { |
|
3711 |
return (set_errno(EINVAL)); |
|
3712 |
} |
|
3713 |
||
3714 |
mutex_enter(&zonehash_lock); |
|
3715 |
if ((zone = zone_find_all_by_id(zoneid)) == NULL) { |
|
3716 |
mutex_exit(&zonehash_lock); |
|
3717 |
return (set_errno(EINVAL)); |
|
3718 |
} |
|
3719 |
zone_hold(zone); |
|
3720 |
mutex_exit(&zonehash_lock); |
|
3721 |
||
3722 |
zone_status = zone_status_get(zone); |
|
3723 |
if (zone_status > ZONE_IS_READY) |
|
3724 |
goto done; |
|
3725 |
||
3726 |
switch (attr) { |
|
3727 |
case ZONE_ATTR_INITNAME: |
|
3728 |
err = zone_set_initname(zone, (const char *)buf); |
|
3729 |
break; |
|
3730 |
case ZONE_ATTR_BOOTARGS: |
|
3731 |
err = zone_set_bootargs(zone, (const char *)buf); |
|
3732 |
break; |
|
3733 |
default: |
|
3734 |
err = EINVAL; |
|
3735 |
} |
|
3736 |
||
3737 |
done: |
|
3738 |
zone_rele(zone); |
|
3739 |
return (err != 0 ? set_errno(err) : 0); |
|
3740 |
} |
|
3741 |
||
3742 |
/* |
|
0 | 3743 |
* Return zero if the process has at least one vnode mapped in to its |
3744 |
* address space which shouldn't be allowed to change zones. |
|
3745 |
*/ |
|
3746 |
static int |
|
3747 |
as_can_change_zones(void) |
|
3748 |
{ |
|
3749 |
proc_t *pp = curproc; |
|
3750 |
struct seg *seg; |
|
3751 |
struct as *as = pp->p_as; |
|
3752 |
vnode_t *vp; |
|
3753 |
int allow = 1; |
|
3754 |
||
3755 |
ASSERT(pp->p_as != &kas); |
|
3756 |
AS_LOCK_ENTER(&as, &as->a_lock, RW_READER); |
|
3757 |
for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { |
|
3758 |
/* |
|
3759 |
* if we can't get a backing vnode for this segment then skip |
|
3760 |
* it. |
|
3761 |
*/ |
|
3762 |
vp = NULL; |
|
3763 |
if (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL) |
|
3764 |
continue; |
|
3765 |
if (!vn_can_change_zones(vp)) { /* bail on first match */ |
|
3766 |
allow = 0; |
|
3767 |
break; |
|
3768 |
} |
|
3769 |
} |
|
3770 |
AS_LOCK_EXIT(&as, &as->a_lock); |
|
3771 |
return (allow); |
|
3772 |
} |
|
3773 |
||
3774 |
/* |
|
3775 |
* Systemcall entry point for zone_enter(). |
|
3776 |
* |
|
3777 |
* The current process is injected into said zone. In the process |
|
3778 |
* it will change its project membership, privileges, rootdir/cwd, |
|
3779 |
* zone-wide rctls, and pool association to match those of the zone. |
|
3780 |
* |
|
3781 |
* The first zone_enter() called while the zone is in the ZONE_IS_READY |
|
3782 |
* state will transition it to ZONE_IS_RUNNING. Processes may only |
|
3783 |
* enter a zone that is "ready" or "running". |
|
3784 |
*/ |
|
3785 |
static int |
|
3786 |
zone_enter(zoneid_t zoneid) |
|
3787 |
{ |
|
3788 |
zone_t *zone; |
|
3789 |
vnode_t *vp; |
|
3790 |
proc_t *pp = curproc; |
|
3791 |
contract_t *ct; |
|
3792 |
cont_process_t *ctp; |
|
3793 |
task_t *tk, *oldtk; |
|
3794 |
kproject_t *zone_proj0; |
|
3795 |
cred_t *cr, *newcr; |
|
3796 |
pool_t *oldpool, *newpool; |
|
3797 |
sess_t *sp; |
|
3798 |
uid_t uid; |
|
3799 |
zone_status_t status; |
|
3800 |
int err = 0; |
|
3801 |
rctl_entity_p_t e; |
|
3802 |
||
3803 |
if (secpolicy_zone_config(CRED()) != 0) |
|
3804 |
return (set_errno(EPERM)); |
|
3805 |
if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID) |
|
3806 |
return (set_errno(EINVAL)); |
|
3807 |
||
3808 |
/* |
|
3809 |
* Stop all lwps so we don't need to hold a lock to look at |
|
3810 |
* curproc->p_zone. This needs to happen before we grab any |
|
3811 |
* locks to avoid deadlock (another lwp in the process could |
|
3812 |
* be waiting for the held lock). |
|
3813 |
*/ |
|
3814 |
if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK)) |
|
3815 |
return (set_errno(EINTR)); |
|
3816 |
||
3817 |
/* |
|
3818 |
* Make sure we're not changing zones with files open or mapped in |
|
3819 |
* to our address space which shouldn't be changing zones. |
|
3820 |
*/ |
|
3821 |
if (!files_can_change_zones()) { |
|
3822 |
err = EBADF; |
|
3823 |
goto out; |
|
3824 |
} |
|
3825 |
if (!as_can_change_zones()) { |
|
3826 |
err = EFAULT; |
|
3827 |
goto out; |
|
3828 |
} |
|
3829 |
||
3830 |
mutex_enter(&zonehash_lock); |
|
3831 |
if (pp->p_zone != global_zone) { |
|
3832 |
mutex_exit(&zonehash_lock); |
|
3833 |
err = EINVAL; |
|
3834 |
goto out; |
|
3835 |
} |
|
3836 |
||
3837 |
zone = zone_find_all_by_id(zoneid); |
|
3838 |
if (zone == NULL) { |
|
3839 |
mutex_exit(&zonehash_lock); |
|
3840 |
err = EINVAL; |
|
3841 |
goto out; |
|
3842 |
} |
|
3843 |
||
3844 |
/* |
|
3845 |
* To prevent processes in a zone from holding contracts on |
|
3846 |
* extrazonal resources, and to avoid process contract |
|
3847 |
* memberships which span zones, contract holders and processes |
|
3848 |
* which aren't the sole members of their encapsulating process |
|
3849 |
* contracts are not allowed to zone_enter. |
|
3850 |
*/ |
|
3851 |
ctp = pp->p_ct_process; |
|
3852 |
ct = &ctp->conp_contract; |
|
3853 |
mutex_enter(&ct->ct_lock); |
|
3854 |
mutex_enter(&pp->p_lock); |
|
3855 |
if ((avl_numnodes(&pp->p_ct_held) != 0) || (ctp->conp_nmembers != 1)) { |
|
3856 |
mutex_exit(&pp->p_lock); |
|
3857 |
mutex_exit(&ct->ct_lock); |
|
3858 |
mutex_exit(&zonehash_lock); |
|
3859 |
pool_unlock(); |
|
3860 |
err = EINVAL; |
|
3861 |
goto out; |
|
3862 |
} |
|
3863 |
||
3864 |
/* |
|
3865 |
* Moreover, we don't allow processes whose encapsulating |
|
3866 |
* process contracts have inherited extrazonal contracts. |
|
3867 |
* While it would be easier to eliminate all process contracts |
|
3868 |
* with inherited contracts, we need to be able to give a |
|
3869 |
* restarted init (or other zone-penetrating process) its |
|
3870 |
* predecessor's contracts. |
|
3871 |
*/ |
|
3872 |
if (ctp->conp_ninherited != 0) { |
|
3873 |
contract_t *next; |
|
3874 |
for (next = list_head(&ctp->conp_inherited); next; |
|
3875 |
next = list_next(&ctp->conp_inherited, next)) { |
|
3876 |
if (contract_getzuniqid(next) != zone->zone_uniqid) { |
|
3877 |
mutex_exit(&pp->p_lock); |
|
3878 |
mutex_exit(&ct->ct_lock); |
|
3879 |
mutex_exit(&zonehash_lock); |
|
3880 |
pool_unlock(); |
|
3881 |
err = EINVAL; |
|
3882 |
goto out; |
|
3883 |
} |
|
3884 |
} |
|
3885 |
} |
|
3886 |
mutex_exit(&pp->p_lock); |
|
3887 |
mutex_exit(&ct->ct_lock); |
|
3888 |
||
3889 |
status = zone_status_get(zone); |
|
3890 |
if (status < ZONE_IS_READY || status >= ZONE_IS_SHUTTING_DOWN) { |
|
3891 |
/* |
|
3892 |
* Can't join |
|
3893 |
*/ |
|
3894 |
mutex_exit(&zonehash_lock); |
|
3895 |
err = EINVAL; |
|
3896 |
goto out; |
|
3897 |
} |
|
3898 |
||
3899 |
/* |
|
3900 |
* Make sure new priv set is within the permitted set for caller |
|
3901 |
*/ |
|
3902 |
if (!priv_issubset(zone->zone_privset, &CR_OPPRIV(CRED()))) { |
|
3903 |
mutex_exit(&zonehash_lock); |
|
3904 |
err = EPERM; |
|
3905 |
goto out; |
|
3906 |
} |
|
3907 |
/* |
|
3908 |
* We want to momentarily drop zonehash_lock while we optimistically |
|
3909 |
* bind curproc to the pool it should be running in. This is safe |
|
3910 |
* since the zone can't disappear (we have a hold on it). |
|
3911 |
*/ |
|
3912 |
zone_hold(zone); |
|
3913 |
mutex_exit(&zonehash_lock); |
|
3914 |
||
3915 |
/* |
|
3916 |
* Grab pool_lock to keep the pools configuration from changing |
|
3917 |
* and to stop ourselves from getting rebound to another pool |
|
3918 |
* until we join the zone. |
|
3919 |
*/ |
|
3920 |
if (pool_lock_intr() != 0) { |
|
3921 |
zone_rele(zone); |
|
3922 |
err = EINTR; |
|
3923 |
goto out; |
|
3924 |
} |
|
3925 |
ASSERT(secpolicy_pool(CRED()) == 0); |
|
3926 |
/* |
|
3927 |
* Bind ourselves to the pool currently associated with the zone. |
|
3928 |
*/ |
|
3929 |
oldpool = curproc->p_pool; |
|
3930 |
newpool = zone_pool_get(zone); |
|
3931 |
if (pool_state == POOL_ENABLED && newpool != oldpool && |
|
3932 |
(err = pool_do_bind(newpool, P_PID, P_MYID, |
|
3933 |
POOL_BIND_ALL)) != 0) { |
|
3934 |
pool_unlock(); |
|
3935 |
zone_rele(zone); |
|
3936 |
goto out; |
|
3937 |
} |
|
3938 |
||
3939 |
/* |
|
3940 |
* Grab cpu_lock now; we'll need it later when we call |
|
3941 |
* task_join(). |
|
3942 |
*/ |
|
3943 |
mutex_enter(&cpu_lock); |
|
3944 |
mutex_enter(&zonehash_lock); |
|
3945 |
/* |
|
3946 |
* Make sure the zone hasn't moved on since we dropped zonehash_lock. |
|
3947 |
*/ |
|
3948 |
if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) { |
|
3949 |
/* |
|
3950 |
* Can't join anymore. |
|
3951 |
*/ |
|
3952 |
mutex_exit(&zonehash_lock); |
|
3953 |
mutex_exit(&cpu_lock); |
|
3954 |
if (pool_state == POOL_ENABLED && |
|
3955 |
newpool != oldpool) |
|
3956 |
(void) pool_do_bind(oldpool, P_PID, P_MYID, |
|
3957 |
POOL_BIND_ALL); |
|
3958 |
pool_unlock(); |
|
3959 |
zone_rele(zone); |
|
3960 |
err = EINVAL; |
|
3961 |
goto out; |
|
3962 |
} |
|
3963 |
||
3964 |
mutex_enter(&pp->p_lock); |
|
3965 |
zone_proj0 = zone->zone_zsched->p_task->tk_proj; |
|
3966 |
/* verify that we do not exceed and task or lwp limits */ |
|
3967 |
mutex_enter(&zone->zone_nlwps_lock); |
|
3968 |
/* add new lwps to zone and zone's proj0 */ |
|
3969 |
zone_proj0->kpj_nlwps += pp->p_lwpcnt; |
|
3970 |
zone->zone_nlwps += pp->p_lwpcnt; |
|
3971 |
/* add 1 task to zone's proj0 */ |
|
3972 |
zone_proj0->kpj_ntasks += 1; |
|
3973 |
mutex_exit(&pp->p_lock); |
|
3974 |
mutex_exit(&zone->zone_nlwps_lock); |
|
3975 |
||
3976 |
/* remove lwps from proc's old zone and old project */ |
|
3977 |
mutex_enter(&pp->p_zone->zone_nlwps_lock); |
|
3978 |
pp->p_zone->zone_nlwps -= pp->p_lwpcnt; |
|
3979 |
pp->p_task->tk_proj->kpj_nlwps -= pp->p_lwpcnt; |
|
3980 |
mutex_exit(&pp->p_zone->zone_nlwps_lock); |
|
3981 |
||
3982 |
/* |
|
3983 |
* Joining the zone cannot fail from now on. |
|
3984 |
* |
|
3985 |
* This means that a lot of the following code can be commonized and |
|
3986 |
* shared with zsched(). |
|
3987 |
*/ |
|
3988 |
||
3989 |
/* |
|
3990 |
* Reset the encapsulating process contract's zone. |
|
3991 |
*/ |
|
3992 |
ASSERT(ct->ct_mzuniqid == GLOBAL_ZONEUNIQID); |
|
3993 |
contract_setzuniqid(ct, zone->zone_uniqid); |
|
3994 |
||
3995 |
/* |
|
3996 |
* Create a new task and associate the process with the project keyed |
|
3997 |
* by (projid,zoneid). |
|
3998 |
* |
|
3999 |
* We might as well be in project 0; the global zone's projid doesn't |
|
4000 |
* make much sense in a zone anyhow. |
|
4001 |
* |
|
4002 |
* This also increments zone_ntasks, and returns with p_lock held. |
|
4003 |
*/ |
|
4004 |
tk = task_create(0, zone); |
|
4005 |
oldtk = task_join(tk, 0); |
|
4006 |
mutex_exit(&cpu_lock); |
|
4007 |
||
4008 |
pp->p_flag |= SZONETOP; |
|
4009 |
pp->p_zone = zone; |
|
4010 |
||
4011 |
/* |
|
4012 |
* call RCTLOP_SET functions on this proc |
|
4013 |
*/ |
|
4014 |
e.rcep_p.zone = zone; |
|
4015 |
e.rcep_t = RCENTITY_ZONE; |
|
4016 |
(void) rctl_set_dup(NULL, NULL, pp, &e, zone->zone_rctls, NULL, |
|
4017 |
RCD_CALLBACK); |
|
4018 |
mutex_exit(&pp->p_lock); |
|
4019 |
||
4020 |
/* |
|
4021 |
* We don't need to hold any of zsched's locks here; not only do we know |
|
4022 |
* the process and zone aren't going away, we know its session isn't |
|
4023 |
* changing either. |
|
4024 |
* |
|
4025 |
* By joining zsched's session here, we mimic the behavior in the |
|
4026 |
* global zone of init's sid being the pid of sched. We extend this |
|
4027 |
* to all zlogin-like zone_enter()'ing processes as well. |
|
4028 |
*/ |
|
4029 |
mutex_enter(&pidlock); |
|
4030 |
sp = zone->zone_zsched->p_sessp; |
|
4031 |
SESS_HOLD(sp); |
|
4032 |
mutex_enter(&pp->p_lock); |
|
4033 |
pgexit(pp); |
|
4034 |
SESS_RELE(pp->p_sessp); |
|
4035 |
pp->p_sessp = sp; |
|
4036 |
pgjoin(pp, zone->zone_zsched->p_pidp); |
|
4037 |
mutex_exit(&pp->p_lock); |
|
4038 |
mutex_exit(&pidlock); |
|
4039 |
||
4040 |
mutex_exit(&zonehash_lock); |
|
4041 |
/* |
|
4042 |
* We're firmly in the zone; let pools progress. |
|
4043 |
*/ |
|
4044 |
pool_unlock(); |
|
4045 |
task_rele(oldtk); |
|
4046 |
/* |
|
4047 |
* We don't need to retain a hold on the zone since we already |
|
4048 |
* incremented zone_ntasks, so the zone isn't going anywhere. |
|
4049 |
*/ |
|
4050 |
zone_rele(zone); |
|
4051 |
||
4052 |
/* |
|
4053 |
* Chroot |
|
4054 |
*/ |
|
4055 |
vp = zone->zone_rootvp; |
|
4056 |
zone_chdir(vp, &PTOU(pp)->u_cdir, pp); |
|
4057 |
zone_chdir(vp, &PTOU(pp)->u_rdir, pp); |
|
4058 |
||
4059 |
/* |
|
4060 |
* Change process credentials |
|
4061 |
*/ |
|
4062 |
newcr = cralloc(); |
|
4063 |
mutex_enter(&pp->p_crlock); |
|
4064 |
cr = pp->p_cred; |
|
4065 |
crcopy_to(cr, newcr); |
|
4066 |
crsetzone(newcr, zone); |
|
4067 |
pp->p_cred = newcr; |
|
4068 |
||
4069 |
/* |
|
4070 |
* Restrict all process privilege sets to zone limit |
|
4071 |
*/ |
|
4072 |
priv_intersect(zone->zone_privset, &CR_PPRIV(newcr)); |
|
4073 |
priv_intersect(zone->zone_privset, &CR_EPRIV(newcr)); |
|
4074 |
priv_intersect(zone->zone_privset, &CR_IPRIV(newcr)); |
|
4075 |
priv_intersect(zone->zone_privset, &CR_LPRIV(newcr)); |
|
4076 |
mutex_exit(&pp->p_crlock); |
|
4077 |
crset(pp, newcr); |
|
4078 |
||
4079 |
/* |
|
4080 |
* Adjust upcount to reflect zone entry. |
|
4081 |
*/ |
|
4082 |
uid = crgetruid(newcr); |
|
4083 |
mutex_enter(&pidlock); |
|
4084 |
upcount_dec(uid, GLOBAL_ZONEID); |
|
4085 |
upcount_inc(uid, zoneid); |
|
4086 |
mutex_exit(&pidlock); |
|
4087 |
||
4088 |
/* |
|
4089 |
* Set up core file path and content. |
|
4090 |
*/ |
|
4091 |
set_core_defaults(); |
|
4092 |
||
4093 |
out: |
|
4094 |
/* |
|
4095 |
* Let the other lwps continue. |
|
4096 |
*/ |
|
4097 |
mutex_enter(&pp->p_lock); |
|
4098 |
if (curthread != pp->p_agenttp) |
|
4099 |
continuelwps(pp); |
|
4100 |
mutex_exit(&pp->p_lock); |
|
4101 |
||
4102 |
return (err != 0 ? set_errno(err) : 0); |
|
4103 |
} |
|
4104 |
||
4105 |
/* |
|
4106 |
* Systemcall entry point for zone_list(2). |
|
4107 |
* |
|
4108 |
* Processes running in a (non-global) zone only see themselves. |
|
1676 | 4109 |
* On labeled systems, they see all zones whose label they dominate. |
0 | 4110 |
*/ |
4111 |
static int |
|
4112 |
zone_list(zoneid_t *zoneidlist, uint_t *numzones) |
|
4113 |
{ |
|
4114 |
zoneid_t *zoneids; |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4115 |
zone_t *zone, *myzone; |
0 | 4116 |
uint_t user_nzones, real_nzones; |
1676 | 4117 |
uint_t domi_nzones; |
4118 |
int error; |
|
0 | 4119 |
|
4120 |
if (copyin(numzones, &user_nzones, sizeof (uint_t)) != 0) |
|
4121 |
return (set_errno(EFAULT)); |
|
4122 |
||
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4123 |
myzone = curproc->p_zone; |
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4124 |
if (myzone != global_zone) { |
1676 | 4125 |
bslabel_t *mybslab; |
4126 |
||
4127 |
if (!is_system_labeled()) { |
|
4128 |
/* just return current zone */ |
|
4129 |
real_nzones = domi_nzones = 1; |
|
4130 |
zoneids = kmem_alloc(sizeof (zoneid_t), KM_SLEEP); |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4131 |
zoneids[0] = myzone->zone_id; |
1676 | 4132 |
} else { |
4133 |
/* return all zones that are dominated */ |
|
4134 |
mutex_enter(&zonehash_lock); |
|
4135 |
real_nzones = zonecount; |
|
4136 |
domi_nzones = 0; |
|
4137 |
if (real_nzones > 0) { |
|
4138 |
zoneids = kmem_alloc(real_nzones * |
|
4139 |
sizeof (zoneid_t), KM_SLEEP); |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4140 |
mybslab = label2bslabel(myzone->zone_slabel); |
1676 | 4141 |
for (zone = list_head(&zone_active); |
4142 |
zone != NULL; |
|
4143 |
zone = list_next(&zone_active, zone)) { |
|
4144 |
if (zone->zone_id == GLOBAL_ZONEID) |
|
4145 |
continue; |
|
1769
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4146 |
if (zone != myzone && |
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4147 |
(zone->zone_flags & ZF_IS_SCRATCH)) |
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4148 |
continue; |
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4149 |
/* |
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4150 |
* Note that a label always dominates |
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4151 |
* itself, so myzone is always included |
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4152 |
* in the list. |
338500d67d4f
6404654 zoneadm mount command fails on labeled systems
carlsonj
parents:
1676
diff
changeset
|
4153 |
*/ |
1676 | 4154 |
if (bldominates(mybslab, |
4155 |
label2bslabel(zone->zone_slabel))) { |
|
4156 |
zoneids[domi_nzones++] = |
|
4157 |
zone->zone_id; |
|
4158 |
} |
|
4159 |
} |
|
4160 |
} |
|
4161 |
mutex_exit(&zonehash_lock); |
|
4162 |
} |
|
0 | 4163 |
} else { |
4164 |
mutex_enter(&zonehash_lock); |
|
4165 |
real_nzones = zonecount; |
|
1676 | 4166 |
domi_nzones = 0; |
4167 |
if (real_nzones > 0) { |
|
0 | 4168 |
zoneids = kmem_alloc(real_nzones * sizeof (zoneid_t), |
4169 |
KM_SLEEP); |
|
4170 |
for (zone = list_head(&zone_active); zone != NULL; |
|
4171 |
zone = list_next(&zone_active, zone)) |
|
1676 | 4172 |
zoneids[domi_nzones++] = zone->zone_id; |
4173 |
ASSERT(domi_nzones == real_nzones); |
|
0 | 4174 |
} |
4175 |
mutex_exit(&zonehash_lock); |
|
4176 |
} |
|
4177 |
||
1676 | 4178 |
/* |
4179 |
* If user has allocated space for fewer entries than we found, then |
|
4180 |
* return only up to his limit. Either way, tell him exactly how many |
|
4181 |
* we found. |
|
4182 |
*/ |
|
4183 |
if (domi_nzones < user_nzones) |
|
4184 |
user_nzones = domi_nzones; |
|
4185 |
error = 0; |
|
4186 |
if (copyout(&domi_nzones, numzones, sizeof (uint_t)) != 0) { |
|
0 | 4187 |
error = EFAULT; |
1676 | 4188 |
} else if (zoneidlist != NULL && user_nzones != 0) { |
0 | 4189 |
if (copyout(zoneids, zoneidlist, |
4190 |
user_nzones * sizeof (zoneid_t)) != 0) |
|
4191 |
error = EFAULT; |
|
4192 |
} |
|
4193 |
||
1676 | 4194 |
if (real_nzones > 0) |
0 | 4195 |
kmem_free(zoneids, real_nzones * sizeof (zoneid_t)); |
4196 |
||
1676 | 4197 |
if (error != 0) |
0 | 4198 |
return (set_errno(error)); |
4199 |
else |
|
4200 |
return (0); |
|
4201 |
} |
|
4202 |
||
4203 |
/* |
|
4204 |
* Systemcall entry point for zone_lookup(2). |
|
4205 |
* |
|
1676 | 4206 |
* Non-global zones are only able to see themselves and (on labeled systems) |
4207 |
* the zones they dominate. |
|
0 | 4208 |
*/ |
4209 |
static zoneid_t |
|
4210 |
zone_lookup(const char *zone_name) |
|
4211 |
{ |
|
4212 |
char *kname; |
|
4213 |
zone_t *zone; |
|
4214 |
zoneid_t zoneid; |
|
4215 |
int err; |
|
4216 |
||
4217 |
if (zone_name == NULL) { |
|
4218 |
/* return caller's zone id */ |
|
4219 |
return (getzoneid()); |
|
4220 |
} |
|
4221 |
||
4222 |
kname = kmem_zalloc(ZONENAME_MAX, KM_SLEEP); |
|
4223 |
if ((err = copyinstr(zone_name, kname, ZONENAME_MAX, NULL)) != 0) { |
|
4224 |
kmem_free(kname, ZONENAME_MAX); |
|
4225 |
return (set_errno(err)); |
|
4226 |
} |
|
4227 |
||
4228 |
mutex_enter(&zonehash_lock); |
|
4229 |
zone = zone_find_all_by_name(kname); |
|
4230 |
kmem_free(kname, ZONENAME_MAX); |
|
1676 | 4231 |
/* |
4232 |
* In a non-global zone, can only lookup global and own name. |
|
4233 |
* In Trusted Extensions zone label dominance rules apply. |
|
4234 |
*/ |
|
4235 |
if (zone == NULL || |
|
4236 |
zone_status_get(zone) < ZONE_IS_READY || |
|
4237 |
!zone_list_access(zone)) { |
|
0 | 4238 |
mutex_exit(&zonehash_lock); |
4239 |
return (set_errno(EINVAL)); |
|
1676 | 4240 |
} else { |
4241 |
zoneid = zone->zone_id; |
|
4242 |
mutex_exit(&zonehash_lock); |
|
4243 |
return (zoneid); |
|
0 | 4244 |
} |
4245 |
} |
|
4246 |
||
813 | 4247 |
static int |
4248 |
zone_version(int *version_arg) |
|
4249 |
{ |
|
4250 |
int version = ZONE_SYSCALL_API_VERSION; |
|
4251 |
||
4252 |
if (copyout(&version, version_arg, sizeof (int)) != 0) |
|
4253 |
return (set_errno(EFAULT)); |
|
4254 |
return (0); |
|
4255 |
} |
|
4256 |
||
0 | 4257 |
/* ARGSUSED */ |
4258 |
long |
|
789 | 4259 |
zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4) |
0 | 4260 |
{ |
4261 |
zone_def zs; |
|
4262 |
||
4263 |
switch (cmd) { |
|
4264 |
case ZONE_CREATE: |
|
4265 |
if (get_udatamodel() == DATAMODEL_NATIVE) { |
|
4266 |
if (copyin(arg1, &zs, sizeof (zone_def))) { |
|
4267 |
return (set_errno(EFAULT)); |
|
4268 |
} |
|
4269 |
} else { |
|
4270 |
#ifdef _SYSCALL32_IMPL |
|
4271 |
zone_def32 zs32; |
|
4272 |
||
4273 |
if (copyin(arg1, &zs32, sizeof (zone_def32))) { |
|
4274 |
return (set_errno(EFAULT)); |
|
4275 |
} |
|
4276 |
zs.zone_name = |
|
4277 |
(const char *)(unsigned long)zs32.zone_name; |
|
4278 |
zs.zone_root = |
|
4279 |
(const char *)(unsigned long)zs32.zone_root; |
|
4280 |
zs.zone_privs = |
|
4281 |
(const struct priv_set *) |
|
4282 |
(unsigned long)zs32.zone_privs; |
|
1409
c25d6f2622c9
6366674 zones service common name could be more descriptive
dp
parents:
1166
diff
changeset
|
4283 |
zs.zone_privssz = zs32.zone_privssz; |
0 | 4284 |
zs.rctlbuf = (caddr_t)(unsigned long)zs32.rctlbuf; |
4285 |
zs.rctlbufsz = zs32.rctlbufsz; |
|
789 | 4286 |
zs.zfsbuf = (caddr_t)(unsigned long)zs32.zfsbuf; |
4287 |
zs.zfsbufsz = zs32.zfsbufsz; |
|
0 | 4288 |
zs.extended_error = |
4289 |
(int *)(unsigned long)zs32.extended_error; |
|
1676 | 4290 |
zs.match = zs32.match; |
4291 |
zs.doi = zs32.doi; |
|
4292 |
zs.label = (const bslabel_t *)(uintptr_t)zs32.label; |
|
0 | 4293 |
#else |
4294 |
panic("get_udatamodel() returned bogus result\n"); |
|
4295 |
#endif |
|
4296 |
} |
|
4297 |
||
4298 |
return (zone_create(zs.zone_name, zs.zone_root, |
|
813 | 4299 |
zs.zone_privs, zs.zone_privssz, |
4300 |
(caddr_t)zs.rctlbuf, zs.rctlbufsz, |
|
4301 |
(caddr_t)zs.zfsbuf, zs.zfsbufsz, |
|
1676 | 4302 |
zs.extended_error, zs.match, zs.doi, |
4303 |
zs.label)); |
|
0 | 4304 |
case ZONE_BOOT: |
2267 | 4305 |
return (zone_boot((zoneid_t)(uintptr_t)arg1)); |
0 | 4306 |
case ZONE_DESTROY: |
4307 |
return (zone_destroy((zoneid_t)(uintptr_t)arg1)); |
|
4308 |
case ZONE_GETATTR: |
|
4309 |
return (zone_getattr((zoneid_t)(uintptr_t)arg1, |
|
4310 |
(int)(uintptr_t)arg2, arg3, (size_t)arg4)); |
|
2267 | 4311 |
case ZONE_SETATTR: |
4312 |
return (zone_setattr((zoneid_t)(uintptr_t)arg1, |
|
4313 |
(int)(uintptr_t)arg2, arg3, (size_t)arg4)); |
|
0 | 4314 |
case ZONE_ENTER: |
4315 |
return (zone_enter((zoneid_t)(uintptr_t)arg1)); |
|
4316 |
case ZONE_LIST: |
|
4317 |
return (zone_list((zoneid_t *)arg1, (uint_t *)arg2)); |
|
4318 |
case ZONE_SHUTDOWN: |
|
4319 |
return (zone_shutdown((zoneid_t)(uintptr_t)arg1)); |
|
4320 |
case ZONE_LOOKUP: |
|
4321 |
return (zone_lookup((const char *)arg1)); |
|
813 | 4322 |
case ZONE_VERSION: |
4323 |
return (zone_version((int *)arg1)); |
|
0 | 4324 |
default: |
4325 |
return (set_errno(EINVAL)); |
|
4326 |
} |
|
4327 |
} |
|
4328 |
||
4329 |
struct zarg { |
|
4330 |
zone_t *zone; |
|
4331 |
zone_cmd_arg_t arg; |
|
4332 |
}; |
|
4333 |
||
4334 |
static int |
|
4335 |
zone_lookup_door(const char *zone_name, door_handle_t *doorp) |
|
4336 |
{ |
|
4337 |
char *buf; |
|
4338 |
size_t buflen; |
|
4339 |
int error; |
|
4340 |
||
4341 |
buflen = sizeof (ZONE_DOOR_PATH) + strlen(zone_name); |
|
4342 |
buf = kmem_alloc(buflen, KM_SLEEP); |
|
4343 |
(void) snprintf(buf, buflen, ZONE_DOOR_PATH, zone_name); |
|
4344 |
error = door_ki_open(buf, doorp); |
|
4345 |
kmem_free(buf, buflen); |
|
4346 |
return (error); |
|
4347 |
} |
|
4348 |
||
4349 |
static void |
|
4350 |
zone_release_door(door_handle_t *doorp) |
|
4351 |
{ |
|
4352 |
door_ki_rele(*doorp); |
|
4353 |
*doorp = NULL; |
|
4354 |
} |
|
4355 |
||
4356 |
static void |
|
4357 |
zone_ki_call_zoneadmd(struct zarg *zargp) |
|
4358 |
{ |
|
4359 |
door_handle_t door = NULL; |
|
4360 |
door_arg_t darg, save_arg; |
|
4361 |
char *zone_name; |
|
4362 |
size_t zone_namelen; |
|
4363 |
zoneid_t zoneid; |
|
4364 |
zone_t *zone; |
|
4365 |
zone_cmd_arg_t arg; |
|
4366 |
uint64_t uniqid; |
|
4367 |
size_t size; |
|
4368 |
int error; |
|
4369 |
int retry; |
|
4370 |
||
4371 |
zone = zargp->zone; |
|
4372 |
arg = zargp->arg; |
|
4373 |
kmem_free(zargp, sizeof (*zargp)); |
|
4374 |
||
4375 |
zone_namelen = strlen(zone->zone_name) + 1; |
|
4376 |
zone_name = kmem_alloc(zone_namelen, KM_SLEEP); |
|
4377 |
bcopy(zone->zone_name, zone_name, zone_namelen); |
|
4378 |
zoneid = zone->zone_id; |
|
4379 |
uniqid = zone->zone_uniqid; |
|
4380 |
/* |
|
4381 |
* zoneadmd may be down, but at least we can empty out the zone. |
|
4382 |
* We can ignore the return value of zone_empty() since we're called |
|
4383 |
* from a kernel thread and know we won't be delivered any signals. |
|
4384 |
*/ |
|
4385 |
ASSERT(curproc == &p0); |
|
4386 |
(void) zone_empty(zone); |
|
4387 |
ASSERT(zone_status_get(zone) >= ZONE_IS_EMPTY); |
|
4388 |
zone_rele(zone); |
|
4389 |
||
4390 |
size = sizeof (arg); |
|
4391 |
darg.rbuf = (char *)&arg; |
|
4392 |
darg.data_ptr = (char *)&arg; |
|
4393 |
darg.rsize = size; |
|
4394 |
darg.data_size = size; |
|
4395 |
darg.desc_ptr = NULL; |
|
4396 |
darg.desc_num = 0; |
|
4397 |
||
4398 |
save_arg = darg; |
|
4399 |
/* |
|
4400 |
* Since we're not holding a reference to the zone, any number of |
|
4401 |
* things can go wrong, including the zone disappearing before we get a |
|
4402 |
* chance to talk to zoneadmd. |
|
4403 |
*/ |
|
4404 |
for (retry = 0; /* forever */; retry++) { |
|
4405 |
if (door == NULL && |
|
4406 |
(error = zone_lookup_door(zone_name, &door)) != 0) { |
|
4407 |
goto next; |
|
4408 |
} |
|
4409 |
ASSERT(door != NULL); |
|
4410 |
||
4411 |
if ((error = door_ki_upcall(door, &darg)) == 0) { |
|
4412 |
break; |
|
4413 |
} |
|
4414 |
switch (error) { |
|
4415 |
case EINTR: |
|
4416 |
/* FALLTHROUGH */ |
|
4417 |
case EAGAIN: /* process may be forking */ |
|
4418 |
/* |
|
4419 |
* Back off for a bit |
|
4420 |
*/ |
|
4421 |
break; |
|
4422 |
case EBADF: |
|
4423 |
zone_release_door(&door); |
|
4424 |
if (zone_lookup_door(zone_name, &door) != 0) { |
|
4425 |
/* |
|
4426 |
* zoneadmd may be dead, but it may come back to |
|
4427 |
* life later. |
|
4428 |
*/ |
|
4429 |
break; |
|
4430 |
} |
|
4431 |
break; |
|
4432 |
default: |
|
4433 |
cmn_err(CE_WARN, |
|
4434 |
"zone_ki_call_zoneadmd: door_ki_upcall error %d\n", |
|
4435 |
error); |
|
4436 |
goto out; |
|
4437 |
} |
|
4438 |
next: |
|
4439 |
/* |
|
4440 |
* If this isn't the same zone_t that we originally had in mind, |
|
4441 |
* then this is the same as if two kadmin requests come in at |
|
4442 |
* the same time: the first one wins. This means we lose, so we |
|
4443 |
* bail. |
|
4444 |
*/ |
|
4445 |
if ((zone = zone_find_by_id(zoneid)) == NULL) { |
|
4446 |
/* |
|
4447 |
* Problem is solved. |
|
4448 |
*/ |
|
4449 |
break; |
|
4450 |
} |
|
4451 |
if (zone->zone_uniqid != uniqid) { |
|
4452 |
/* |
|
4453 |
* zoneid recycled |
|
4454 |
*/ |
|
4455 |
zone_rele(zone); |
|
4456 |
break; |
|
4457 |
} |
|
4458 |
/* |
|
4459 |
* We could zone_status_timedwait(), but there doesn't seem to |
|
4460 |
* be much point in doing that (plus, it would mean that |
|
4461 |
* zone_free() isn't called until this thread exits). |
|
4462 |
*/ |
|
4463 |
zone_rele(zone); |
|
4464 |
delay(hz); |
|
4465 |
darg = save_arg; |
|
4466 |
} |
|
4467 |
out: |
|
4468 |
if (door != NULL) { |
|
4469 |
zone_release_door(&door); |
|
4470 |
} |
|
4471 |
kmem_free(zone_name, zone_namelen); |
|
4472 |
thread_exit(); |
|
4473 |
} |
|
4474 |
||
4475 |
/* |
|
2267 | 4476 |
* Entry point for uadmin() to tell the zone to go away or reboot. Analog to |
4477 |
* kadmin(). The caller is a process in the zone. |
|
0 | 4478 |
* |
4479 |
* In order to shutdown the zone, we will hand off control to zoneadmd |
|
4480 |
* (running in the global zone) via a door. We do a half-hearted job at |
|
4481 |
* killing all processes in the zone, create a kernel thread to contact |
|
4482 |
* zoneadmd, and make note of the "uniqid" of the zone. The uniqid is |
|
4483 |
* a form of generation number used to let zoneadmd (as well as |
|
4484 |
* zone_destroy()) know exactly which zone they're re talking about. |
|
4485 |
*/ |
|
4486 |
int |
|
2267 | 4487 |
zone_kadmin(int cmd, int fcn, const char *mdep, cred_t *credp) |
0 | 4488 |
{ |
4489 |
struct zarg *zargp; |
|
4490 |
zone_cmd_t zcmd; |
|
4491 |
zone_t *zone; |
|
4492 |
||
4493 |
zone = curproc->p_zone; |
|
4494 |
ASSERT(getzoneid() != GLOBAL_ZONEID); |
|
4495 |
||
4496 |
switch (cmd) { |
|
4497 |
case A_SHUTDOWN: |
|
4498 |
switch (fcn) { |
|
4499 |
case AD_HALT: |
|
4500 |
case AD_POWEROFF: |
|
4501 |
zcmd = Z_HALT; |
|
4502 |
break; |
|
4503 |
case AD_BOOT: |
|
4504 |
zcmd = Z_REBOOT; |
|
4505 |
break; |
|
4506 |
case AD_IBOOT: |
|
4507 |
case AD_SBOOT: |
|
4508 |
case AD_SIBOOT: |
|
4509 |
case AD_NOSYNC: |
|
4510 |
return (ENOTSUP); |
|
4511 |
default: |
|
4512 |
return (EINVAL); |
|
4513 |
} |
|
4514 |
break; |
|
4515 |
case A_REBOOT: |
|
4516 |
zcmd = Z_REBOOT; |
|
4517 |
break; |
|
4518 |
case A_FTRACE: |
|
4519 |
case A_REMOUNT: |
|
4520 |
case A_FREEZE: |
|
4521 |
case A_DUMP: |
|
4522 |
return (ENOTSUP); |
|
4523 |
default: |
|
4524 |
ASSERT(cmd != A_SWAPCTL); /* handled by uadmin() */ |
|
4525 |
return (EINVAL); |
|
4526 |
} |
|
4527 |
||
4528 |
if (secpolicy_zone_admin(credp, B_FALSE)) |
|
4529 |
return (EPERM); |
|
4530 |
mutex_enter(&zone_status_lock); |
|
2267 | 4531 |
|
0 | 4532 |
/* |
4533 |
* zone_status can't be ZONE_IS_EMPTY or higher since curproc |
|
4534 |
* is in the zone. |
|
4535 |
*/ |
|
4536 |
ASSERT(zone_status_get(zone) < ZONE_IS_EMPTY); |
|
4537 |
if (zone_status_get(zone) > ZONE_IS_RUNNING) { |
|
4538 |
/* |
|
4539 |
* This zone is already on its way down. |
|
4540 |
*/ |
|
4541 |
mutex_exit(&zone_status_lock); |
|
4542 |
return (0); |
|
4543 |
} |
|
4544 |
/* |
|
4545 |
* Prevent future zone_enter()s |
|
4546 |
*/ |
|
4547 |
zone_status_set(zone, ZONE_IS_SHUTTING_DOWN); |
|
4548 |
mutex_exit(&zone_status_lock); |
|
4549 |
||
4550 |
/* |
|
4551 |
* Kill everyone now and call zoneadmd later. |
|
4552 |
* zone_ki_call_zoneadmd() will do a more thorough job of this |
|
4553 |
* later. |
|
4554 |
*/ |
|
4555 |
killall(zone->zone_id); |
|
4556 |
/* |
|
4557 |
* Now, create the thread to contact zoneadmd and do the rest of the |
|
4558 |
* work. This thread can't be created in our zone otherwise |
|
4559 |
* zone_destroy() would deadlock. |
|
4560 |
*/ |
|
2267 | 4561 |
zargp = kmem_zalloc(sizeof (*zargp), KM_SLEEP); |
0 | 4562 |
zargp->arg.cmd = zcmd; |
4563 |
zargp->arg.uniqid = zone->zone_uniqid; |
|
2267 | 4564 |
zargp->zone = zone; |
0 | 4565 |
(void) strcpy(zargp->arg.locale, "C"); |
2267 | 4566 |
/* mdep was already copied in for us by uadmin */ |
4567 |
if (mdep != NULL) |
|
4568 |
(void) strlcpy(zargp->arg.bootbuf, mdep, |
|
4569 |
sizeof (zargp->arg.bootbuf)); |
|
4570 |
zone_hold(zone); |
|
0 | 4571 |
|
4572 |
(void) thread_create(NULL, 0, zone_ki_call_zoneadmd, zargp, 0, &p0, |
|
4573 |
TS_RUN, minclsyspri); |
|
4574 |
exit(CLD_EXITED, 0); |
|
4575 |
||
4576 |
return (EINVAL); |
|
4577 |
} |
|
4578 |
||
4579 |
/* |
|
4580 |
* Entry point so kadmin(A_SHUTDOWN, ...) can set the global zone's |
|
4581 |
* status to ZONE_IS_SHUTTING_DOWN. |
|
4582 |
*/ |
|
4583 |
void |
|
4584 |
zone_shutdown_global(void) |
|
4585 |
{ |
|
4586 |
ASSERT(curproc->p_zone == global_zone); |
|
4587 |
||
4588 |
mutex_enter(&zone_status_lock); |
|
4589 |
ASSERT(zone_status_get(global_zone) == ZONE_IS_RUNNING); |
|
4590 |
zone_status_set(global_zone, ZONE_IS_SHUTTING_DOWN); |
|
4591 |
mutex_exit(&zone_status_lock); |
|
4592 |
} |
|
789 | 4593 |
|
4594 |
/* |
|
4595 |
* Returns true if the named dataset is visible in the current zone. |
|
4596 |
* The 'write' parameter is set to 1 if the dataset is also writable. |
|
4597 |
*/ |
|
4598 |
int |
|
4599 |
zone_dataset_visible(const char *dataset, int *write) |
|
4600 |
{ |
|
4601 |
zone_dataset_t *zd; |
|
4602 |
size_t len; |
|
4603 |
zone_t *zone = curproc->p_zone; |
|
4604 |
||
4605 |
if (dataset[0] == '\0') |
|
4606 |
return (0); |
|
4607 |
||
4608 |
/* |
|
4609 |
* Walk the list once, looking for datasets which match exactly, or |
|
4610 |
* specify a dataset underneath an exported dataset. If found, return |
|
4611 |
* true and note that it is writable. |
|
4612 |
*/ |
|
4613 |
for (zd = list_head(&zone->zone_datasets); zd != NULL; |
|
4614 |
zd = list_next(&zone->zone_datasets, zd)) { |
|
4615 |
||
4616 |
len = strlen(zd->zd_dataset); |
|
4617 |
if (strlen(dataset) >= len && |
|
4618 |
bcmp(dataset, zd->zd_dataset, len) == 0 && |
|
816
4a2d51f7b961
6344201 Assertion failed: err == 0 (0x1 == 0x0), file: ../../common/fs/zfs/zfs_ctldir.c, line: 659
maybee
parents:
813
diff
changeset
|
4619 |
(dataset[len] == '\0' || dataset[len] == '/' || |
4a2d51f7b961
6344201 Assertion failed: err == 0 (0x1 == 0x0), file: ../../common/fs/zfs/zfs_ctldir.c, line: 659
maybee
parents:
813
diff
changeset
|
4620 |
dataset[len] == '@')) { |
789 | 4621 |
if (write) |
4622 |
*write = 1; |
|
4623 |
return (1); |
|
4624 |
} |
|
4625 |
} |
|
4626 |
||
4627 |
/* |
|
4628 |
* Walk the list a second time, searching for datasets which are parents |
|
4629 |
* of exported datasets. These should be visible, but read-only. |
|
4630 |
* |
|
4631 |
* Note that we also have to support forms such as 'pool/dataset/', with |
|
4632 |
* a trailing slash. |
|
4633 |
*/ |
|
4634 |
for (zd = list_head(&zone->zone_datasets); zd != NULL; |
|
4635 |
zd = list_next(&zone->zone_datasets, zd)) { |
|
4636 |
||
4637 |
len = strlen(dataset); |
|
4638 |
if (dataset[len - 1] == '/') |
|
4639 |
len--; /* Ignore trailing slash */ |
|
4640 |
if (len < strlen(zd->zd_dataset) && |
|
4641 |
bcmp(dataset, zd->zd_dataset, len) == 0 && |
|
4642 |
zd->zd_dataset[len] == '/') { |
|
4643 |
if (write) |
|
4644 |
*write = 0; |
|
4645 |
return (1); |
|
4646 |
} |
|
4647 |
} |
|
4648 |
||
4649 |
return (0); |
|
4650 |
} |
|
1676 | 4651 |
|
4652 |
/* |
|
4653 |
* zone_find_by_any_path() - |
|
4654 |
* |
|
4655 |
* kernel-private routine similar to zone_find_by_path(), but which |
|
4656 |
* effectively compares against zone paths rather than zonerootpath |
|
4657 |
* (i.e., the last component of zonerootpaths, which should be "root/", |
|
4658 |
* are not compared.) This is done in order to accurately identify all |
|
4659 |
* paths, whether zone-visible or not, including those which are parallel |
|
4660 |
* to /root/, such as /dev/, /home/, etc... |
|
4661 |
* |
|
4662 |
* If the specified path does not fall under any zone path then global |
|
4663 |
* zone is returned. |
|
4664 |
* |
|
4665 |
* The treat_abs parameter indicates whether the path should be treated as |
|
4666 |
* an absolute path although it does not begin with "/". (This supports |
|
4667 |
* nfs mount syntax such as host:any/path.) |
|
4668 |
* |
|
4669 |
* The caller is responsible for zone_rele of the returned zone. |
|
4670 |
*/ |
|
4671 |
zone_t * |
|
4672 |
zone_find_by_any_path(const char *path, boolean_t treat_abs) |
|
4673 |
{ |
|
4674 |
zone_t *zone; |
|
4675 |
int path_offset = 0; |
|
4676 |
||
4677 |
if (path == NULL) { |
|
4678 |
zone_hold(global_zone); |
|
4679 |
return (global_zone); |
|
4680 |
} |
|
4681 |
||
4682 |
if (*path != '/') { |
|
4683 |
ASSERT(treat_abs); |
|
4684 |
path_offset = 1; |
|
4685 |
} |
|
4686 |
||
4687 |
mutex_enter(&zonehash_lock); |
|
4688 |
for (zone = list_head(&zone_active); zone != NULL; |
|
4689 |
zone = list_next(&zone_active, zone)) { |
|
4690 |
char *c; |
|
4691 |
size_t pathlen; |
|
1876
1427ed2daa73
6414797 code in zone_find_by_any_path generates bad assembly code and panic, can be worked around w/ change
mp46848
parents:
1769
diff
changeset
|
4692 |
char *rootpath_start; |
1676 | 4693 |
|
4694 |
if (zone == global_zone) /* skip global zone */ |
|
4695 |
continue; |
|
4696 |
||
4697 |
/* scan backwards to find start of last component */ |
|
4698 |
c = zone->zone_rootpath + zone->zone_rootpathlen - 2; |
|
4699 |
do { |
|
4700 |
c--; |
|
4701 |
} while (*c != '/'); |
|
4702 |
||
1876
1427ed2daa73
6414797 code in zone_find_by_any_path generates bad assembly code and panic, can be worked around w/ change
mp46848
parents:
1769
diff
changeset
|
4703 |
pathlen = c - zone->zone_rootpath + 1 - path_offset; |
1427ed2daa73
6414797 code in zone_find_by_any_path generates bad assembly code and panic, can be worked around w/ change
mp46848
parents:
1769
diff
changeset
|
4704 |
rootpath_start = (zone->zone_rootpath + path_offset); |
1427ed2daa73
6414797 code in zone_find_by_any_path generates bad assembly code and panic, can be worked around w/ change
mp46848
parents:
1769
diff
changeset
|
4705 |
if (strncmp(path, rootpath_start, pathlen) == 0) |
1676 | 4706 |
break; |
4707 |
} |
|
4708 |
if (zone == NULL) |
|
4709 |
zone = global_zone; |
|
4710 |
zone_hold(zone); |
|
4711 |
mutex_exit(&zonehash_lock); |
|
4712 |
return (zone); |
|
4713 |
} |