--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/brand/lx/lx_brand/common/sched.c Mon Sep 11 22:51:59 2006 -0700
@@ -0,0 +1,610 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/cred_impl.h>
+#include <sys/ucred.h>
+#include <ucred.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sched.h>
+#include <strings.h>
+#include <pthread.h>
+#include <time.h>
+#include <thread.h>
+#include <alloca.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_sched.h>
+
+/* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */
+static int
+validate_policy(int policy)
+{
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ return (SCHED_FIFO);
+
+ case LX_SCHED_RR:
+ return (SCHED_RR);
+
+ case LX_SCHED_OTHER:
+ return (SCHED_OTHER);
+
+ default:
+ lx_debug("validate_policy: illegal policy: %d", policy);
+ return (-EINVAL);
+ }
+}
+
+/*
+ * Check to see if we have the permissions to set scheduler parameters and
+ * policy, based on Linux' demand that such commands fail with errno set to
+ * EPERM if the current euid is not the euid or ruid of the process in
+ * question.
+ */
+static int
+check_schedperms(pid_t pid)
+{
+ size_t sz;
+ ucred_t *cr;
+ uid_t euid;
+
+ euid = geteuid();
+
+ if (pid == getpid()) {
+ /*
+ * If we're the process to be checked, simply check the euid
+ * against our ruid.
+ */
+ if (euid != getuid())
+ return (-EPERM);
+
+ return (0);
+ }
+
+ /*
+ * We allocate a ucred_t ourselves rather than call ucred_get(3C)
+ * because ucred_get() calls malloc(3C), which the brand library cannot
+ * use. Because we allocate the space with SAFE_ALLOCA(), there's
+ * no need to free it when we're done.
+ */
+ sz = ucred_size();
+ cr = (ucred_t *)SAFE_ALLOCA(sz);
+
+ if (cr == NULL)
+ return (-ENOMEM);
+
+ /*
+ * If we can't access the process' credentials, fail with errno EPERM
+ * as the call would not have succeeded anyway.
+ */
+ if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0)
+ return ((errno == EACCES) ? -EPERM : -errno);
+
+ if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr)))
+ return (-EPERM);
+
+ return (0);
+}
+
+static int
+ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp)
+{
+ struct lx_sched_param ls;
+ int smin = sched_get_priority_min(policy);
+ int smax = sched_get_priority_max(policy);
+
+ if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0)
+ return (-errno);
+
+ bzero(sp, sizeof (struct sched_param));
+
+ /*
+ * Linux has a fixed priority range, 0 - 99, which we need to convert to
+ * Solaris's dynamic range. Linux considers lower numbers to be
+ * higher priority, so we'll invert the priority within Solaris's range.
+ *
+ * The formula to convert between ranges is:
+ *
+ * L * (smax - smin)
+ * S = ----------------- + smin
+ * (lmax - lmin)
+ *
+ * where S is the Solaris equivalent of the linux priority L.
+ *
+ * To invert the priority, we use:
+ * S' = smax - S + smin
+ *
+ * Together, these two formulas become:
+ *
+ * L * (smax - smin)
+ * S = smax - ----------------- + 2smin
+ * 99
+ */
+ sp->sched_priority = smax -
+ ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin;
+
+ lx_debug("ltos_sparam: linux prio %d = Solaris prio %d "
+ "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority,
+ smin, smax);
+
+ return (0);
+}
+
+static int
+stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp)
+{
+ struct lx_sched_param ls;
+ int smin = sched_get_priority_min(policy);
+ int smax = sched_get_priority_max(policy);
+
+ if (policy == SCHED_OTHER) {
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ ls.lx_sched_prio = 0;
+ } else {
+ /*
+ * Convert Solaris's dynamic, inverted priority range to the
+ * fixed Linux range of 1 - 99.
+ *
+ * The formula is (see above):
+ *
+ * (smax - s + 2smin) * 99
+ * l = -----------------------
+ * smax - smin
+ */
+ ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) *
+ LX_PRI_MAX) / (smax - smin);
+ }
+
+ lx_debug("stol_sparam: Solaris prio %d = linux prio %d "
+ "(Solaris range %d,%d)\n", sp->sched_priority, ls.lx_sched_prio,
+ smin, smax);
+
+ return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0)
+ ? -errno : 0);
+}
+
+#define BITINDEX(ind) (ind / (sizeof (ulong_t) * 8))
+#define BITSHIFT(ind) (1 << (ind % (sizeof (ulong_t) * 8)))
+
+/* ARGSUSED */
+int
+lx_sched_getaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
+{
+ int sz;
+ ulong_t *lmask, *zmask;
+ int i;
+
+ sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, pid, len, maskp);
+ if (sz == -1)
+ return (-errno);
+
+ /*
+ * If the target LWP hasn't ever had an affinity mask set, the kernel
+ * will return a mask of all 0's. If that is the case we must build a
+ * default mask that has all valid bits turned on.
+ */
+ lmask = SAFE_ALLOCA(sz);
+ zmask = SAFE_ALLOCA(sz);
+ if (lmask == NULL || zmask == NULL)
+ return (-ENOMEM);
+
+ bzero(zmask, sz);
+
+ if (uucopy((void *)maskp, lmask, sz) != 0)
+ return (-EFAULT);
+
+ if (bcmp(lmask, zmask, sz) != 0)
+ return (sz);
+
+ for (i = 0; i < sz * 8; i++) {
+ if (p_online(i, P_STATUS) != -1) {
+ lmask[BITINDEX(i)] |= BITSHIFT(i);
+ }
+ }
+
+ if (uucopy(lmask, (void *)maskp, sz) != 0)
+ return (-EFAULT);
+
+ return (sz);
+}
+
+/* ARGSUSED */
+int
+lx_sched_setaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
+{
+ int ret;
+ int sz;
+ int i;
+ int found;
+ ulong_t *lmask;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ processorid_t cpuid;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair(pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * We only support setting affinity masks for threads in
+ * the calling process.
+ */
+ if (s_pid != getpid())
+ return (-EPERM);
+
+ /*
+ * First, get the minimum bitmask size from the kernel.
+ */
+ sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, 0, 0, 0);
+ if (sz == -1)
+ return (-errno);
+
+ lmask = SAFE_ALLOCA(sz);
+ if (lmask == NULL)
+ return (-ENOMEM);
+
+ if (uucopy((void *)maskp, lmask, sz) != 0)
+ return (-EFAULT);
+
+ /*
+ * Make sure the mask contains at least one processor that is
+ * physically on the system. Reduce the user's mask to the set of
+ * physically present CPUs. Keep track of how many valid
+ * bits are set in the user's mask.
+ */
+
+ for (found = 0, i = 0; i < sz * 8; i++) {
+ if (p_online(i, P_STATUS) == -1) {
+ /*
+ * This CPU doesn't exist, so clear this bit from
+ * the user's mask.
+ */
+ lmask[BITINDEX(i)] &= ~BITSHIFT(i);
+ continue;
+ }
+
+ if ((lmask[BITINDEX(i)] & BITSHIFT(i)) == BITSHIFT(i)) {
+ found++;
+ cpuid = i;
+ }
+ }
+
+ if (found == 0) {
+ lx_debug("\tlx_sched_setaffinity: mask has no present CPUs\n");
+ return (-EINVAL);
+ }
+
+ /*
+ * If only one bit is set, bind the thread to that procesor;
+ * otherwise, clear the binding.
+ */
+ if (found == 1) {
+ lx_debug("\tlx_sched_setaffinity: binding thread %d to cpu%d\n",
+ s_tid, cpuid);
+ if (processor_bind(P_LWPID, s_tid, cpuid, NULL) != 0)
+ /*
+ * It could be that the requested processor is offline,
+ * so we'll just abandon our good-natured attempt to
+ * bind to it.
+ */
+ lx_debug("couldn't bind LWP %d to cpu %d: %s\n", s_tid,
+ cpuid, strerror(errno));
+ } else {
+ lx_debug("\tlx_sched_setaffinity: clearing thr %d binding\n",
+ s_tid);
+ if (processor_bind(P_LWPID, s_tid, PBIND_NONE, NULL) != 0) {
+ lx_debug("couldn't clear CPU binding for LWP %d: %s\n",
+ s_tid, strerror(errno));
+ }
+ }
+
+ /*
+ * Finally, ask the kernel to make a note of our current (though fairly
+ * meaningless) affinity mask.
+ */
+ ret = syscall(SYS_brand, B_SET_AFFINITY_MASK, pid, sz, lmask);
+
+ return ((ret == 0) ? 0 : -errno);
+}
+
+int
+lx_sched_getparam(uintptr_t pid, uintptr_t param)
+{
+ int policy, ret;
+ pid_t s_pid;
+ lwpid_t s_tid;
+
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * If we're attempting to get information on our own process, we can
+ * get data on a per-thread basis; if not, punt and use the specified
+ * pid.
+ */
+ if (s_pid == getpid()) {
+ if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0)
+ return (-ret);
+ } else {
+ if (sched_getparam(s_pid, &sp) == -1)
+ return (-errno);
+
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+ }
+
+ return (stol_sparam(policy, &sp, (struct lx_sched_param *)param));
+}
+
+int
+lx_sched_setparam(uintptr_t pid, uintptr_t param)
+{
+ int err, policy;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ struct lx_sched_param lp;
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ if (s_pid == getpid()) {
+ struct sched_param dummy;
+
+ if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
+ return (-err);
+ } else
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+
+ lx_debug("sched_setparam(): current policy %d", policy);
+
+ if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
+ return (-errno);
+
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0))
+ return (-EINVAL);
+
+ if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp,
+ &sp)) != 0)
+ return (err);
+
+ /*
+ * Check if we're allowed to change the scheduler for the process.
+ *
+ * If we're operating on a thread, we can't just call
+ * pthread_setschedparam() because as all threads reside within a
+ * single Solaris process, Solaris will allow the modification
+ *
+ * If we're operating on a process, we can't just call sched_setparam()
+ * because Solaris will allow the call to succeed if the scheduler
+ * parameters do not differ from those being installed, but Linux wants
+ * the call to fail.
+ */
+ if ((err = check_schedperms(s_pid)) != 0)
+ return (err);
+
+ if (s_pid == getpid())
+ return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0)
+ ? -err : 0);
+
+ return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0);
+}
+
+int
+lx_sched_rr_get_interval(uintptr_t pid, uintptr_t timespec)
+{
+ struct timespec ts;
+ pid_t s_pid;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0)
+ return (-ESRCH);
+
+ if (uucopy((struct timespec *)timespec, &ts,
+ sizeof (struct timespec)) != 0)
+ return (-errno);
+
+ return ((sched_rr_get_interval(s_pid, &ts) == -1) ? -errno : 0);
+}
+
+int
+lx_sched_getscheduler(uintptr_t pid)
+{
+ int policy, rv;
+ pid_t s_pid;
+ lwpid_t s_tid;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ if (s_pid == getpid()) {
+ struct sched_param dummy;
+
+ if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
+ return (-rv);
+ } else
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+
+ /*
+ * Linux only supports certain policies; avoid confusing apps with
+ * alien policies.
+ */
+ switch (policy) {
+ case SCHED_FIFO:
+ return (LX_SCHED_FIFO);
+ case SCHED_OTHER:
+ return (LX_SCHED_OTHER);
+ case SCHED_RR:
+ return (LX_SCHED_RR);
+ default:
+ break;
+ }
+
+ return (LX_SCHED_OTHER);
+}
+
+int
+lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param)
+{
+ int rt_pol;
+ int rv;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ struct lx_sched_param lp;
+
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if ((rt_pol = validate_policy((int)policy)) < 0)
+ return (rt_pol);
+
+ if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param,
+ &sp)) != 0)
+ return (rv);
+
+ if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
+ return (-errno);
+
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ if ((rt_pol == LX_SCHED_OTHER) && (lp.lx_sched_prio != 0))
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * Check if we're allowed to change the scheduler for the process.
+ *
+ * If we're operating on a thread, we can't just call
+ * pthread_setschedparam() because as all threads reside within a
+ * single Solaris process, Solaris will allow the modification.
+ *
+ * If we're operating on a process, we can't just call
+ * sched_setscheduler() because Solaris will allow the call to succeed
+ * if the scheduler and scheduler parameters do not differ from those
+ * being installed, but Linux wants the call to fail.
+ */
+ if ((rv = check_schedperms(s_pid)) != 0)
+ return (rv);
+
+ if (s_pid == getpid()) {
+ struct sched_param param;
+ int pol;
+
+ if ((pol = sched_getscheduler(s_pid)) != 0)
+ return (-errno);
+
+ /*
+ * sched_setscheduler() returns the previous scheduling policy
+ * on success, so call pthread_getschedparam() to get the
+ * current thread's scheduling policy and return that if the
+ * call to pthread_setschedparam() succeeds.
+ */
+ if ((rv = pthread_getschedparam(s_tid, &pol, ¶m)) != 0)
+ return (-rv);
+
+ return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0)
+ ? -rv : pol);
+ }
+
+ return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1)
+ ? -errno : rv);
+}
+
+int
+lx_sched_get_priority_min(uintptr_t policy)
+{
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0.
+ * Linux scheduling priorities are not alterable, so there is no
+ * Solaris translation necessary.
+ */
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ return (LX_SCHED_PRIORITY_MIN_RRFIFO);
+ case LX_SCHED_OTHER:
+ return (LX_SCHED_PRIORITY_MIN_OTHER);
+ default:
+ break;
+ }
+ return (-EINVAL);
+}
+
+int
+lx_sched_get_priority_max(uintptr_t policy)
+{
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ * Linux scheduling priorities are not alterable, so there is no
+ * Solaris translation necessary.
+ */
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ return (LX_SCHED_PRIORITY_MAX_RRFIFO);
+ case LX_SCHED_OTHER:
+ return (LX_SCHED_PRIORITY_MAX_OTHER);
+ default:
+ break;
+ }
+ return (-EINVAL);
+}