--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/perl/Kstat.xs Mon Apr 19 14:25:41 2010 -0400
@@ -0,0 +1,1715 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Kstat.xs is a Perl XS (eXStension module) that makes the Solaris
+ * kstat(3KSTAT) facility available to Perl scripts. Kstat is a general-purpose
+ * mechanism for providing kernel statistics to users. The Solaris API is
+ * function-based (see the manpage for details), but for ease of use in Perl
+ * scripts this module presents the information as a nested hash data structure.
+ * It would be too inefficient to read every kstat in the system, so this module
+ * uses the Perl TIEHASH mechanism to implement a read-on-demand semantic, which
+ * only reads and updates kstats as and when they are actually accessed.
+ */
+
+/*
+ * Ignored raw kstats.
+ *
+ * Some raw kstats are ignored by this module, these are listed below. The
+ * most common reason is that the kstats are stored as arrays and the ks_ndata
+ * and/or ks_data_size fields are invalid. In this case it is impossible to
+ * know how many records are in the array, so they can't be read.
+ *
+ * unix:*:sfmmu_percpu_stat
+ * This is stored as an array with one entry per cpu. Each element is of type
+ * struct sfmmu_percpu_stat. The ks_ndata and ks_data_size fields are bogus.
+ *
+ * ufs directio:*:UFS DirectIO Stats
+ * The structure definition used for these kstats (ufs_directio_kstats) is in a
+ * C file (uts/common/fs/ufs/ufs_directio.c) rather than a header file, so it
+ * isn't accessible.
+ *
+ * qlc:*:statistics
+ * This is a third-party driver for which we don't have source.
+ *
+ * mm:*:phys_installed
+ * This is stored as an array of uint64_t, with each pair of values being the
+ * (address, size) of a memory segment. The ks_ndata and ks_data_size fields
+ * are both zero.
+ *
+ * sockfs:*:sock_unix_list
+ * This is stored as an array with one entry per active socket. Each element
+ * is of type struct k_sockinfo. The ks_ndata and ks_data_size fields are both
+ * zero.
+ *
+ * Note that the ks_ndata and ks_data_size of many non-array raw kstats are
+ * also incorrect. The relevant assertions are therefore commented out in the
+ * appropriate raw kstat read routines.
+ */
+
+/* Kstat related includes */
+#include <libgen.h>
+#include <kstat.h>
+#include <sys/var.h>
+#include <sys/utsname.h>
+#include <sys/sysinfo.h>
+#include <sys/flock.h>
+#include <sys/dnlc.h>
+#include <sys/vmmeter.h>
+#include <nfs/nfs.h>
+#include <nfs/nfs_clnt.h>
+
+/* Ultra-specific kstat includes */
+#ifdef __sparc
+#include <vm/hat_sfmmu.h> /* from /usr/platform/sun4u/include */
+#include <sys/simmstat.h> /* from /usr/platform/sun4u/include */
+#include <sys/sysctrl.h> /* from /usr/platform/sun4u/include */
+#include <sys/fhc.h> /* from /usr/include */
+#endif
+
+/*
+ * Solaris #defines SP, which conflicts with the perl definition of SP
+ * We don't need the Solaris one, so get rid of it to avoid warnings
+ */
+#undef SP
+
+/* Perl XS includes */
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+/* Debug macros */
+#define DEBUG_ID "Sun::Solaris::Kstat"
+#ifdef KSTAT_DEBUG
+#define PERL_ASSERT(EXP) \
+ ((void)((EXP) || (croak("%s: assertion failed at %s:%d: %s", \
+ DEBUG_ID, __FILE__, __LINE__, #EXP), 0), 0))
+#define PERL_ASSERTMSG(EXP, MSG) \
+ ((void)((EXP) || (croak(DEBUG_ID ": " MSG), 0), 0))
+#else
+#define PERL_ASSERT(EXP) ((void)0)
+#define PERL_ASSERTMSG(EXP, MSG) ((void)0)
+#endif
+
+/* Macros for saving the contents of KSTAT_RAW structures */
+#if defined(HAS_QUAD) && defined(USE_64_BIT_INT)
+#define NEW_IV(V) \
+ (newSViv((IVTYPE) V))
+#define NEW_UV(V) \
+ (newSVuv((UVTYPE) V))
+#else
+#define NEW_IV(V) \
+ (V >= IV_MIN && V <= IV_MAX ? newSViv((IVTYPE) V) : newSVnv((NVTYPE) V))
+#if defined(UVTYPE)
+#define NEW_UV(V) \
+ (V <= UV_MAX ? newSVuv((UVTYPE) V) : newSVnv((NVTYPE) V))
+# else
+#define NEW_UV(V) \
+ (V <= IV_MAX ? newSViv((IVTYPE) V) : newSVnv((NVTYPE) V))
+#endif
+#endif
+#define NEW_HRTIME(V) \
+ newSVnv((NVTYPE) (V / 1000000000.0))
+
+#define SAVE_FNP(H, F, K) \
+ hv_store(H, K, sizeof (K) - 1, newSViv((IVTYPE) &F), 0)
+#define SAVE_STRING(H, S, K, SS) \
+ hv_store(H, #K, sizeof (#K) - 1, \
+ newSVpvn(S->K, SS ? strlen(S->K) : sizeof(S->K)), 0)
+#define SAVE_INT32(H, S, K) \
+ hv_store(H, #K, sizeof (#K) - 1, NEW_IV(S->K), 0)
+#define SAVE_UINT32(H, S, K) \
+ hv_store(H, #K, sizeof (#K) - 1, NEW_UV(S->K), 0)
+#define SAVE_INT64(H, S, K) \
+ hv_store(H, #K, sizeof (#K) - 1, NEW_IV(S->K), 0)
+#define SAVE_UINT64(H, S, K) \
+ hv_store(H, #K, sizeof (#K) - 1, NEW_UV(S->K), 0)
+#define SAVE_HRTIME(H, S, K) \
+ hv_store(H, #K, sizeof (#K) - 1, NEW_HRTIME(S->K), 0)
+
+/* Private structure used for saving kstat info in the tied hashes */
+typedef struct {
+ char read; /* Kstat block has been read before */
+ char valid; /* Kstat still exists in kstat chain */
+ char strip_str; /* Strip KSTAT_DATA_CHAR fields */
+ kstat_ctl_t *kstat_ctl; /* Handle returned by kstat_open */
+ kstat_t *kstat; /* Handle used by kstat_read */
+} KstatInfo_t;
+
+/* typedef for apply_to_ties callback functions */
+typedef int (*ATTCb_t)(HV *, void *);
+
+/* typedef for raw kstat reader functions */
+typedef void (*kstat_raw_reader_t)(HV *, kstat_t *, int);
+
+/* Hash of "module:name" to KSTAT_RAW read functions */
+static HV *raw_kstat_lookup;
+
+/*
+ * Kstats come in two flavours, named and raw. Raw kstats are just C structs,
+ * so we need a function per raw kstat to convert the C struct into the
+ * corresponding perl hash. All such conversion functions are in the following
+ * section.
+ */
+
+/*
+ * Definitions in /usr/include/sys/cpuvar.h and /usr/include/sys/sysinfo.h
+ */
+
+static void
+save_cpu_stat(HV *self, kstat_t *kp, int strip_str)
+{
+ cpu_stat_t *statp;
+ cpu_sysinfo_t *sysinfop;
+ cpu_syswait_t *syswaitp;
+ cpu_vminfo_t *vminfop;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (cpu_stat_t));
+ statp = (cpu_stat_t *)(kp->ks_data);
+ sysinfop = &statp->cpu_sysinfo;
+ syswaitp = &statp->cpu_syswait;
+ vminfop = &statp->cpu_vminfo;
+
+ hv_store(self, "idle", 4, NEW_UV(sysinfop->cpu[CPU_IDLE]), 0);
+ hv_store(self, "user", 4, NEW_UV(sysinfop->cpu[CPU_USER]), 0);
+ hv_store(self, "kernel", 6, NEW_UV(sysinfop->cpu[CPU_KERNEL]), 0);
+ hv_store(self, "wait", 4, NEW_UV(sysinfop->cpu[CPU_WAIT]), 0);
+ hv_store(self, "wait_io", 7, NEW_UV(sysinfop->wait[W_IO]), 0);
+ hv_store(self, "wait_swap", 9, NEW_UV(sysinfop->wait[W_SWAP]), 0);
+ hv_store(self, "wait_pio", 8, NEW_UV(sysinfop->wait[W_PIO]), 0);
+ SAVE_UINT32(self, sysinfop, bread);
+ SAVE_UINT32(self, sysinfop, bwrite);
+ SAVE_UINT32(self, sysinfop, lread);
+ SAVE_UINT32(self, sysinfop, lwrite);
+ SAVE_UINT32(self, sysinfop, phread);
+ SAVE_UINT32(self, sysinfop, phwrite);
+ SAVE_UINT32(self, sysinfop, pswitch);
+ SAVE_UINT32(self, sysinfop, trap);
+ SAVE_UINT32(self, sysinfop, intr);
+ SAVE_UINT32(self, sysinfop, syscall);
+ SAVE_UINT32(self, sysinfop, sysread);
+ SAVE_UINT32(self, sysinfop, syswrite);
+ SAVE_UINT32(self, sysinfop, sysfork);
+ SAVE_UINT32(self, sysinfop, sysvfork);
+ SAVE_UINT32(self, sysinfop, sysexec);
+ SAVE_UINT32(self, sysinfop, readch);
+ SAVE_UINT32(self, sysinfop, writech);
+ SAVE_UINT32(self, sysinfop, rcvint);
+ SAVE_UINT32(self, sysinfop, xmtint);
+ SAVE_UINT32(self, sysinfop, mdmint);
+ SAVE_UINT32(self, sysinfop, rawch);
+ SAVE_UINT32(self, sysinfop, canch);
+ SAVE_UINT32(self, sysinfop, outch);
+ SAVE_UINT32(self, sysinfop, msg);
+ SAVE_UINT32(self, sysinfop, sema);
+ SAVE_UINT32(self, sysinfop, namei);
+ SAVE_UINT32(self, sysinfop, ufsiget);
+ SAVE_UINT32(self, sysinfop, ufsdirblk);
+ SAVE_UINT32(self, sysinfop, ufsipage);
+ SAVE_UINT32(self, sysinfop, ufsinopage);
+ SAVE_UINT32(self, sysinfop, inodeovf);
+ SAVE_UINT32(self, sysinfop, fileovf);
+ SAVE_UINT32(self, sysinfop, procovf);
+ SAVE_UINT32(self, sysinfop, intrthread);
+ SAVE_UINT32(self, sysinfop, intrblk);
+ SAVE_UINT32(self, sysinfop, idlethread);
+ SAVE_UINT32(self, sysinfop, inv_swtch);
+ SAVE_UINT32(self, sysinfop, nthreads);
+ SAVE_UINT32(self, sysinfop, cpumigrate);
+ SAVE_UINT32(self, sysinfop, xcalls);
+ SAVE_UINT32(self, sysinfop, mutex_adenters);
+ SAVE_UINT32(self, sysinfop, rw_rdfails);
+ SAVE_UINT32(self, sysinfop, rw_wrfails);
+ SAVE_UINT32(self, sysinfop, modload);
+ SAVE_UINT32(self, sysinfop, modunload);
+ SAVE_UINT32(self, sysinfop, bawrite);
+#ifdef STATISTICS /* see header file */
+ SAVE_UINT32(self, sysinfop, rw_enters);
+ SAVE_UINT32(self, sysinfop, win_uo_cnt);
+ SAVE_UINT32(self, sysinfop, win_uu_cnt);
+ SAVE_UINT32(self, sysinfop, win_so_cnt);
+ SAVE_UINT32(self, sysinfop, win_su_cnt);
+ SAVE_UINT32(self, sysinfop, win_suo_cnt);
+#endif
+
+ SAVE_INT32(self, syswaitp, iowait);
+ SAVE_INT32(self, syswaitp, swap);
+ SAVE_INT32(self, syswaitp, physio);
+
+ SAVE_UINT32(self, vminfop, pgrec);
+ SAVE_UINT32(self, vminfop, pgfrec);
+ SAVE_UINT32(self, vminfop, pgin);
+ SAVE_UINT32(self, vminfop, pgpgin);
+ SAVE_UINT32(self, vminfop, pgout);
+ SAVE_UINT32(self, vminfop, pgpgout);
+ SAVE_UINT32(self, vminfop, swapin);
+ SAVE_UINT32(self, vminfop, pgswapin);
+ SAVE_UINT32(self, vminfop, swapout);
+ SAVE_UINT32(self, vminfop, pgswapout);
+ SAVE_UINT32(self, vminfop, zfod);
+ SAVE_UINT32(self, vminfop, dfree);
+ SAVE_UINT32(self, vminfop, scan);
+ SAVE_UINT32(self, vminfop, rev);
+ SAVE_UINT32(self, vminfop, hat_fault);
+ SAVE_UINT32(self, vminfop, as_fault);
+ SAVE_UINT32(self, vminfop, maj_fault);
+ SAVE_UINT32(self, vminfop, cow_fault);
+ SAVE_UINT32(self, vminfop, prot_fault);
+ SAVE_UINT32(self, vminfop, softlock);
+ SAVE_UINT32(self, vminfop, kernel_asflt);
+ SAVE_UINT32(self, vminfop, pgrrun);
+ SAVE_UINT32(self, vminfop, execpgin);
+ SAVE_UINT32(self, vminfop, execpgout);
+ SAVE_UINT32(self, vminfop, execfree);
+ SAVE_UINT32(self, vminfop, anonpgin);
+ SAVE_UINT32(self, vminfop, anonpgout);
+ SAVE_UINT32(self, vminfop, anonfree);
+ SAVE_UINT32(self, vminfop, fspgin);
+ SAVE_UINT32(self, vminfop, fspgout);
+ SAVE_UINT32(self, vminfop, fsfree);
+}
+
+/*
+ * Definitions in /usr/include/sys/var.h
+ */
+
+static void
+save_var(HV *self, kstat_t *kp, int strip_str)
+{
+ struct var *varp;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (struct var));
+ varp = (struct var *)(kp->ks_data);
+
+ SAVE_INT32(self, varp, v_buf);
+ SAVE_INT32(self, varp, v_call);
+ SAVE_INT32(self, varp, v_proc);
+ SAVE_INT32(self, varp, v_maxupttl);
+ SAVE_INT32(self, varp, v_nglobpris);
+ SAVE_INT32(self, varp, v_maxsyspri);
+ SAVE_INT32(self, varp, v_clist);
+ SAVE_INT32(self, varp, v_maxup);
+ SAVE_INT32(self, varp, v_hbuf);
+ SAVE_INT32(self, varp, v_hmask);
+ SAVE_INT32(self, varp, v_pbuf);
+ SAVE_INT32(self, varp, v_sptmap);
+ SAVE_INT32(self, varp, v_maxpmem);
+ SAVE_INT32(self, varp, v_autoup);
+ SAVE_INT32(self, varp, v_bufhwm);
+}
+
+/*
+ * Definition in /usr/include/sys/vmmeter.h
+ */
+
+static void
+save_flushmeter(HV *self, kstat_t *kp, int strip_str)
+{
+ struct flushmeter *flushmeterp;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (struct flushmeter));
+ flushmeterp = (struct flushmeter *)(kp->ks_data);
+
+ SAVE_UINT32(self, flushmeterp, f_ctx);
+ SAVE_UINT32(self, flushmeterp, f_segment);
+ SAVE_UINT32(self, flushmeterp, f_page);
+ SAVE_UINT32(self, flushmeterp, f_partial);
+ SAVE_UINT32(self, flushmeterp, f_usr);
+ SAVE_UINT32(self, flushmeterp, f_region);
+}
+
+/*
+ * Definition in /usr/include/sys/dnlc.h
+ */
+
+static void
+save_ncstats(HV *self, kstat_t *kp, int strip_str)
+{
+ struct ncstats *ncstatsp;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (struct ncstats));
+ ncstatsp = (struct ncstats *)(kp->ks_data);
+
+ SAVE_INT32(self, ncstatsp, hits);
+ SAVE_INT32(self, ncstatsp, misses);
+ SAVE_INT32(self, ncstatsp, enters);
+ SAVE_INT32(self, ncstatsp, dbl_enters);
+ SAVE_INT32(self, ncstatsp, long_enter);
+ SAVE_INT32(self, ncstatsp, long_look);
+ SAVE_INT32(self, ncstatsp, move_to_front);
+ SAVE_INT32(self, ncstatsp, purges);
+}
+
+/*
+ * Definition in /usr/include/sys/sysinfo.h
+ */
+
+static void
+save_sysinfo(HV *self, kstat_t *kp, int strip_str)
+{
+ sysinfo_t *sysinfop;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (sysinfo_t));
+ sysinfop = (sysinfo_t *)(kp->ks_data);
+
+ SAVE_UINT32(self, sysinfop, updates);
+ SAVE_UINT32(self, sysinfop, runque);
+ SAVE_UINT32(self, sysinfop, runocc);
+ SAVE_UINT32(self, sysinfop, swpque);
+ SAVE_UINT32(self, sysinfop, swpocc);
+ SAVE_UINT32(self, sysinfop, waiting);
+}
+
+/*
+ * Definition in /usr/include/sys/sysinfo.h
+ */
+
+static void
+save_vminfo(HV *self, kstat_t *kp, int strip_str)
+{
+ vminfo_t *vminfop;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (vminfo_t));
+ vminfop = (vminfo_t *)(kp->ks_data);
+
+ SAVE_UINT64(self, vminfop, freemem);
+ SAVE_UINT64(self, vminfop, swap_resv);
+ SAVE_UINT64(self, vminfop, swap_alloc);
+ SAVE_UINT64(self, vminfop, swap_avail);
+ SAVE_UINT64(self, vminfop, swap_free);
+}
+
+/*
+ * Definition in /usr/include/nfs/nfs_clnt.h
+ */
+
+static void
+save_nfs(HV *self, kstat_t *kp, int strip_str)
+{
+ struct mntinfo_kstat *mntinfop;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (struct mntinfo_kstat));
+ mntinfop = (struct mntinfo_kstat *)(kp->ks_data);
+
+ SAVE_STRING(self, mntinfop, mik_proto, strip_str);
+ SAVE_UINT32(self, mntinfop, mik_vers);
+ SAVE_UINT32(self, mntinfop, mik_flags);
+ SAVE_UINT32(self, mntinfop, mik_secmod);
+ SAVE_UINT32(self, mntinfop, mik_curread);
+ SAVE_UINT32(self, mntinfop, mik_curwrite);
+ SAVE_INT32(self, mntinfop, mik_timeo);
+ SAVE_INT32(self, mntinfop, mik_retrans);
+ SAVE_UINT32(self, mntinfop, mik_acregmin);
+ SAVE_UINT32(self, mntinfop, mik_acregmax);
+ SAVE_UINT32(self, mntinfop, mik_acdirmin);
+ SAVE_UINT32(self, mntinfop, mik_acdirmax);
+ hv_store(self, "lookup_srtt", 11,
+ NEW_UV(mntinfop->mik_timers[0].srtt), 0);
+ hv_store(self, "lookup_deviate", 14,
+ NEW_UV(mntinfop->mik_timers[0].deviate), 0);
+ hv_store(self, "lookup_rtxcur", 13,
+ NEW_UV(mntinfop->mik_timers[0].rtxcur), 0);
+ hv_store(self, "read_srtt", 9,
+ NEW_UV(mntinfop->mik_timers[1].srtt), 0);
+ hv_store(self, "read_deviate", 12,
+ NEW_UV(mntinfop->mik_timers[1].deviate), 0);
+ hv_store(self, "read_rtxcur", 11,
+ NEW_UV(mntinfop->mik_timers[1].rtxcur), 0);
+ hv_store(self, "write_srtt", 10,
+ NEW_UV(mntinfop->mik_timers[2].srtt), 0);
+ hv_store(self, "write_deviate", 13,
+ NEW_UV(mntinfop->mik_timers[2].deviate), 0);
+ hv_store(self, "write_rtxcur", 12,
+ NEW_UV(mntinfop->mik_timers[2].rtxcur), 0);
+ SAVE_UINT32(self, mntinfop, mik_noresponse);
+ SAVE_UINT32(self, mntinfop, mik_failover);
+ SAVE_UINT32(self, mntinfop, mik_remap);
+ SAVE_STRING(self, mntinfop, mik_curserver, strip_str);
+}
+
+/*
+ * The following struct => hash functions are all only present on the sparc
+ * platform, so they are all conditionally compiled depending on __sparc
+ */
+
+/*
+ * Definition in /usr/platform/sun4u/include/vm/hat_sfmmu.h
+ */
+
+#ifdef __sparc
+static void
+save_sfmmu_global_stat(HV *self, kstat_t *kp, int strip_str)
+{
+ struct sfmmu_global_stat *sfmmugp;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (struct sfmmu_global_stat));
+ sfmmugp = (struct sfmmu_global_stat *)(kp->ks_data);
+
+ SAVE_INT32(self, sfmmugp, sf_tsb_exceptions);
+ SAVE_INT32(self, sfmmugp, sf_tsb_raise_exception);
+ SAVE_INT32(self, sfmmugp, sf_pagefaults);
+ SAVE_INT32(self, sfmmugp, sf_uhash_searches);
+ SAVE_INT32(self, sfmmugp, sf_uhash_links);
+ SAVE_INT32(self, sfmmugp, sf_khash_searches);
+ SAVE_INT32(self, sfmmugp, sf_khash_links);
+ SAVE_INT32(self, sfmmugp, sf_swapout);
+ SAVE_INT32(self, sfmmugp, sf_tsb_alloc);
+ SAVE_INT32(self, sfmmugp, sf_tsb_allocfail);
+ SAVE_INT32(self, sfmmugp, sf_tsb_sectsb_create);
+ SAVE_INT32(self, sfmmugp, sf_scd_1sttsb_alloc);
+ SAVE_INT32(self, sfmmugp, sf_scd_2ndtsb_alloc);
+ SAVE_INT32(self, sfmmugp, sf_scd_1sttsb_allocfail);
+ SAVE_INT32(self, sfmmugp, sf_scd_2ndtsb_allocfail);
+ SAVE_INT32(self, sfmmugp, sf_tteload8k);
+ SAVE_INT32(self, sfmmugp, sf_tteload64k);
+ SAVE_INT32(self, sfmmugp, sf_tteload512k);
+ SAVE_INT32(self, sfmmugp, sf_tteload4m);
+ SAVE_INT32(self, sfmmugp, sf_tteload32m);
+ SAVE_INT32(self, sfmmugp, sf_tteload256m);
+ SAVE_INT32(self, sfmmugp, sf_tsb_load8k);
+ SAVE_INT32(self, sfmmugp, sf_tsb_load4m);
+ SAVE_INT32(self, sfmmugp, sf_hblk_hit);
+ SAVE_INT32(self, sfmmugp, sf_hblk8_ncreate);
+ SAVE_INT32(self, sfmmugp, sf_hblk8_nalloc);
+ SAVE_INT32(self, sfmmugp, sf_hblk1_ncreate);
+ SAVE_INT32(self, sfmmugp, sf_hblk1_nalloc);
+ SAVE_INT32(self, sfmmugp, sf_hblk_slab_cnt);
+ SAVE_INT32(self, sfmmugp, sf_hblk_reserve_cnt);
+ SAVE_INT32(self, sfmmugp, sf_hblk_recurse_cnt);
+ SAVE_INT32(self, sfmmugp, sf_hblk_reserve_hit);
+ SAVE_INT32(self, sfmmugp, sf_get_free_success);
+ SAVE_INT32(self, sfmmugp, sf_get_free_throttle);
+ SAVE_INT32(self, sfmmugp, sf_get_free_fail);
+ SAVE_INT32(self, sfmmugp, sf_put_free_success);
+ SAVE_INT32(self, sfmmugp, sf_put_free_fail);
+ SAVE_INT32(self, sfmmugp, sf_pgcolor_conflict);
+ SAVE_INT32(self, sfmmugp, sf_uncache_conflict);
+ SAVE_INT32(self, sfmmugp, sf_unload_conflict);
+ SAVE_INT32(self, sfmmugp, sf_ism_uncache);
+ SAVE_INT32(self, sfmmugp, sf_ism_recache);
+ SAVE_INT32(self, sfmmugp, sf_recache);
+ SAVE_INT32(self, sfmmugp, sf_steal_count);
+ SAVE_INT32(self, sfmmugp, sf_pagesync);
+ SAVE_INT32(self, sfmmugp, sf_clrwrt);
+ SAVE_INT32(self, sfmmugp, sf_pagesync_invalid);
+ SAVE_INT32(self, sfmmugp, sf_kernel_xcalls);
+ SAVE_INT32(self, sfmmugp, sf_user_xcalls);
+ SAVE_INT32(self, sfmmugp, sf_tsb_grow);
+ SAVE_INT32(self, sfmmugp, sf_tsb_shrink);
+ SAVE_INT32(self, sfmmugp, sf_tsb_resize_failures);
+ SAVE_INT32(self, sfmmugp, sf_tsb_reloc);
+ SAVE_INT32(self, sfmmugp, sf_user_vtop);
+ SAVE_INT32(self, sfmmugp, sf_ctx_inv);
+ SAVE_INT32(self, sfmmugp, sf_tlb_reprog_pgsz);
+ SAVE_INT32(self, sfmmugp, sf_region_remap_demap);
+ SAVE_INT32(self, sfmmugp, sf_create_scd);
+ SAVE_INT32(self, sfmmugp, sf_join_scd);
+ SAVE_INT32(self, sfmmugp, sf_leave_scd);
+ SAVE_INT32(self, sfmmugp, sf_destroy_scd);
+}
+#endif
+
+/*
+ * Definition in /usr/platform/sun4u/include/vm/hat_sfmmu.h
+ */
+
+#ifdef __sparc
+static void
+save_sfmmu_tsbsize_stat(HV *self, kstat_t *kp, int strip_str)
+{
+ struct sfmmu_tsbsize_stat *sfmmutp;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (struct sfmmu_tsbsize_stat));
+ sfmmutp = (struct sfmmu_tsbsize_stat *)(kp->ks_data);
+
+ SAVE_INT32(self, sfmmutp, sf_tsbsz_8k);
+ SAVE_INT32(self, sfmmutp, sf_tsbsz_16k);
+ SAVE_INT32(self, sfmmutp, sf_tsbsz_32k);
+ SAVE_INT32(self, sfmmutp, sf_tsbsz_64k);
+ SAVE_INT32(self, sfmmutp, sf_tsbsz_128k);
+ SAVE_INT32(self, sfmmutp, sf_tsbsz_256k);
+ SAVE_INT32(self, sfmmutp, sf_tsbsz_512k);
+ SAVE_INT32(self, sfmmutp, sf_tsbsz_1m);
+ SAVE_INT32(self, sfmmutp, sf_tsbsz_2m);
+ SAVE_INT32(self, sfmmutp, sf_tsbsz_4m);
+}
+#endif
+
+/*
+ * Definition in /usr/platform/sun4u/include/sys/simmstat.h
+ */
+
+#ifdef __sparc
+static void
+save_simmstat(HV *self, kstat_t *kp, int strip_str)
+{
+ uchar_t *simmstatp;
+ SV *list;
+ int i;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (uchar_t) * SIMM_COUNT);
+
+ list = newSVpv("", 0);
+ for (i = 0, simmstatp = (uchar_t *)(kp->ks_data);
+ i < SIMM_COUNT - 1; i++, simmstatp++) {
+ sv_catpvf(list, "%d,", *simmstatp);
+ }
+ sv_catpvf(list, "%d", *simmstatp);
+ hv_store(self, "status", 6, list, 0);
+}
+#endif
+
+/*
+ * Used by save_temperature to make CSV lists from arrays of
+ * short temperature values
+ */
+
+#ifdef __sparc
+static SV *
+short_array_to_SV(short *shortp, int len)
+{
+ SV *list;
+
+ list = newSVpv("", 0);
+ for (; len > 1; len--, shortp++) {
+ sv_catpvf(list, "%d,", *shortp);
+ }
+ sv_catpvf(list, "%d", *shortp);
+ return (list);
+}
+
+/*
+ * Definition in /usr/platform/sun4u/include/sys/fhc.h
+ */
+
+static void
+save_temperature(HV *self, kstat_t *kp, int strip_str)
+{
+ struct temp_stats *tempsp;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (struct temp_stats));
+ tempsp = (struct temp_stats *)(kp->ks_data);
+
+ SAVE_UINT32(self, tempsp, index);
+ hv_store(self, "l1", 2, short_array_to_SV(tempsp->l1, L1_SZ), 0);
+ hv_store(self, "l2", 2, short_array_to_SV(tempsp->l2, L2_SZ), 0);
+ hv_store(self, "l3", 2, short_array_to_SV(tempsp->l3, L3_SZ), 0);
+ hv_store(self, "l4", 2, short_array_to_SV(tempsp->l4, L4_SZ), 0);
+ hv_store(self, "l5", 2, short_array_to_SV(tempsp->l5, L5_SZ), 0);
+ SAVE_INT32(self, tempsp, max);
+ SAVE_INT32(self, tempsp, min);
+ SAVE_INT32(self, tempsp, state);
+ SAVE_INT32(self, tempsp, temp_cnt);
+ SAVE_INT32(self, tempsp, shutdown_cnt);
+ SAVE_INT32(self, tempsp, version);
+ SAVE_INT32(self, tempsp, trend);
+ SAVE_INT32(self, tempsp, override);
+}
+#endif
+
+/*
+ * Not actually defined anywhere - just a short. Yuck.
+ */
+
+#ifdef __sparc
+static void
+save_temp_over(HV *self, kstat_t *kp, int strip_str)
+{
+ short *shortp;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == sizeof (short));
+
+ shortp = (short *)(kp->ks_data);
+ hv_store(self, "override", 8, newSViv(*shortp), 0);
+}
+#endif
+
+/*
+ * Defined in /usr/platform/sun4u/include/sys/sysctrl.h
+ * (Well, sort of. Actually there's no structure, just a list of #defines
+ * enumerating *some* of the array indexes.)
+ */
+
+#ifdef __sparc
+static void
+save_ps_shadow(HV *self, kstat_t *kp, int strip_str)
+{
+ uchar_t *ucharp;
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ PERL_ASSERT(kp->ks_data_size == SYS_PS_COUNT);
+
+ ucharp = (uchar_t *)(kp->ks_data);
+ hv_store(self, "core_0", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "core_1", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "core_2", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "core_3", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "core_4", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "core_5", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "core_6", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "core_7", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "pps_0", 5, newSViv(*ucharp++), 0);
+ hv_store(self, "clk_33", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "clk_50", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "v5_p", 4, newSViv(*ucharp++), 0);
+ hv_store(self, "v12_p", 5, newSViv(*ucharp++), 0);
+ hv_store(self, "v5_aux", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "v5_p_pch", 8, newSViv(*ucharp++), 0);
+ hv_store(self, "v12_p_pch", 9, newSViv(*ucharp++), 0);
+ hv_store(self, "v3_pch", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "v5_pch", 6, newSViv(*ucharp++), 0);
+ hv_store(self, "p_fan", 5, newSViv(*ucharp++), 0);
+}
+#endif
+
+/*
+ * Definition in /usr/platform/sun4u/include/sys/fhc.h
+ */
+
+#ifdef __sparc
+static void
+save_fault_list(HV *self, kstat_t *kp, int strip_str)
+{
+ struct ft_list *faultp;
+ int i;
+ char name[KSTAT_STRLEN + 7]; /* room for 999999 faults */
+
+ /* PERL_ASSERT(kp->ks_ndata == 1); */
+ /* PERL_ASSERT(kp->ks_data_size == sizeof (struct ft_list)); */
+
+ for (i = 1, faultp = (struct ft_list *)(kp->ks_data);
+ i <= 999999 && i <= kp->ks_data_size / sizeof (struct ft_list);
+ i++, faultp++) {
+ (void) snprintf(name, sizeof (name), "unit_%d", i);
+ hv_store(self, name, strlen(name), newSViv(faultp->unit), 0);
+ (void) snprintf(name, sizeof (name), "type_%d", i);
+ hv_store(self, name, strlen(name), newSViv(faultp->type), 0);
+ (void) snprintf(name, sizeof (name), "fclass_%d", i);
+ hv_store(self, name, strlen(name), newSViv(faultp->fclass), 0);
+ (void) snprintf(name, sizeof (name), "create_time_%d", i);
+ hv_store(self, name, strlen(name),
+ NEW_UV(faultp->create_time), 0);
+ (void) snprintf(name, sizeof (name), "msg_%d", i);
+ hv_store(self, name, strlen(name), newSVpv(faultp->msg, 0), 0);
+ }
+}
+#endif
+
+/*
+ * We need to be able to find the function corresponding to a particular raw
+ * kstat. To do this we ignore the instance and glue the module and name
+ * together to form a composite key. We can then use the data in the kstat
+ * structure to find the appropriate function. We use a perl hash to manage the
+ * lookup, where the key is "module:name" and the value is a pointer to the
+ * appropriate C function.
+ *
+ * Note that some kstats include the instance number as part of the module
+ * and/or name. This could be construed as a bug. However, to work around this
+ * we omit any digits from the module and name as we build the table in
+ * build_raw_kstat_loopup(), and we remove any digits from the module and name
+ * when we look up the functions in lookup_raw_kstat_fn()
+ */
+
+/*
+ * This function is called when the XS is first dlopen()ed, and builds the
+ * lookup table as described above.
+ */
+
+static void
+build_raw_kstat_lookup()
+ {
+ /* Create new hash */
+ raw_kstat_lookup = newHV();
+
+ SAVE_FNP(raw_kstat_lookup, save_cpu_stat, "cpu_stat:cpu_stat");
+ SAVE_FNP(raw_kstat_lookup, save_var, "unix:var");
+ SAVE_FNP(raw_kstat_lookup, save_flushmeter, "unix:flushmeter");
+ SAVE_FNP(raw_kstat_lookup, save_ncstats, "unix:ncstats");
+ SAVE_FNP(raw_kstat_lookup, save_sysinfo, "unix:sysinfo");
+ SAVE_FNP(raw_kstat_lookup, save_vminfo, "unix:vminfo");
+ SAVE_FNP(raw_kstat_lookup, save_nfs, "nfs:mntinfo");
+#ifdef __sparc
+ SAVE_FNP(raw_kstat_lookup, save_sfmmu_global_stat,
+ "unix:sfmmu_global_stat");
+ SAVE_FNP(raw_kstat_lookup, save_sfmmu_tsbsize_stat,
+ "unix:sfmmu_tsbsize_stat");
+ SAVE_FNP(raw_kstat_lookup, save_simmstat, "unix:simm-status");
+ SAVE_FNP(raw_kstat_lookup, save_temperature, "unix:temperature");
+ SAVE_FNP(raw_kstat_lookup, save_temp_over, "unix:temperature override");
+ SAVE_FNP(raw_kstat_lookup, save_ps_shadow, "unix:ps_shadow");
+ SAVE_FNP(raw_kstat_lookup, save_fault_list, "unix:fault_list");
+#endif
+}
+
+/*
+ * This finds and returns the raw kstat reader function corresponding to the
+ * supplied module and name. If no matching function exists, 0 is returned.
+ */
+
+static kstat_raw_reader_t lookup_raw_kstat_fn(char *module, char *name)
+ {
+ char key[KSTAT_STRLEN * 2];
+ register char *f, *t;
+ SV **entry;
+ kstat_raw_reader_t fnp;
+
+ /* Copy across module & name, removing any digits - see comment above */
+ for (f = module, t = key; *f != '\0'; f++, t++) {
+ while (*f != '\0' && isdigit(*f)) { f++; }
+ *t = *f;
+ }
+ *t++ = ':';
+ for (f = name; *f != '\0'; f++, t++) {
+ while (*f != '\0' && isdigit(*f)) {
+ f++;
+ }
+ *t = *f;
+ }
+ *t = '\0';
+
+ /* look up & return the function, or teturn 0 if not found */
+ if ((entry = hv_fetch(raw_kstat_lookup, key, strlen(key), FALSE)) == 0)
+ {
+ fnp = 0;
+ } else {
+ fnp = (kstat_raw_reader_t)(uintptr_t)SvIV(*entry);
+ }
+ return (fnp);
+}
+
+/*
+ * This module converts the flat list returned by kstat_read() into a perl hash
+ * tree keyed on module, instance, name and statistic. The following functions
+ * provide code to create the nested hashes, and to iterate over them.
+ */
+
+/*
+ * Given module, instance and name keys return a pointer to the hash tied to
+ * the bottommost hash. If the hash already exists, we just return a pointer
+ * to it, otherwise we create the hash and any others also required above it in
+ * the hierarchy. The returned tiehash is blessed into the
+ * Sun::Solaris::Kstat::_Stat class, so that the appropriate TIEHASH methods are
+ * called when the bottommost hash is accessed. If the is_new parameter is
+ * non-null it will be set to TRUE if a new tie has been created, and FALSE if
+ * the tie already existed.
+ */
+
+static HV *
+get_tie(SV *self, char *module, int instance, char *name, int *is_new)
+{
+ char str_inst[11]; /* big enough for up to 10^10 instances */
+ char *key[3]; /* 3 part key: module, instance, name */
+ int k;
+ int new;
+ HV *hash;
+ HV *tie;
+
+ /* Create the keys */
+ (void) snprintf(str_inst, sizeof (str_inst), "%d", instance);
+ key[0] = module;
+ key[1] = str_inst;
+ key[2] = name;
+
+ /* Iteratively descend the tree, creating new hashes as required */
+ hash = (HV *)SvRV(self);
+ for (k = 0; k < 3; k++) {
+ SV **entry;
+
+ SvREADONLY_off(hash);
+ entry = hv_fetch(hash, key[k], strlen(key[k]), TRUE);
+
+ /* If the entry doesn't exist, create it */
+ if (! SvOK(*entry)) {
+ HV *newhash;
+ SV *rv;
+
+ newhash = newHV();
+ rv = newRV_noinc((SV *)newhash);
+ sv_setsv(*entry, rv);
+ SvREFCNT_dec(rv);
+ if (k < 2) {
+ SvREADONLY_on(newhash);
+ }
+ SvREADONLY_on(*entry);
+ SvREADONLY_on(hash);
+ hash = newhash;
+ new = 1;
+
+ /* Otherwise it already existed */
+ } else {
+ SvREADONLY_on(hash);
+ hash = (HV *)SvRV(*entry);
+ new = 0;
+ }
+ }
+
+ /* Create and bless a hash for the tie, if necessary */
+ if (new) {
+ SV *tieref;
+ HV *stash;
+
+ tie = newHV();
+ tieref = newRV_noinc((SV *)tie);
+ stash = gv_stashpv("Sun::Solaris::Kstat::_Stat", TRUE);
+ sv_bless(tieref, stash);
+
+ /* Add TIEHASH magic */
+ hv_magic(hash, (GV *)tieref, 'P');
+ SvREADONLY_on(hash);
+
+ /* Otherwise, just find the existing tied hash */
+ } else {
+ MAGIC *mg;
+
+ mg = mg_find((SV *)hash, 'P');
+ PERL_ASSERTMSG(mg != 0, "get_tie: lost P magic");
+ tie = (HV *)SvRV(mg->mg_obj);
+ }
+ if (is_new) {
+ *is_new = new;
+ }
+ return (tie);
+}
+
+/*
+ * This is an iterator function used to traverse the hash hierarchy and apply
+ * the passed function to the tied hashes at the bottom of the hierarchy. If
+ * any of the callback functions return 0, 0 is returned, otherwise 1
+ */
+
+static int
+apply_to_ties(SV *self, ATTCb_t cb, void *arg)
+{
+ HV *hash1;
+ HE *entry1;
+ long s;
+ int ret;
+
+ hash1 = (HV *)SvRV(self);
+ hv_iterinit(hash1);
+ ret = 1;
+
+ /* Iterate over each module */
+ while (entry1 = hv_iternext(hash1)) {
+ HV *hash2;
+ HE *entry2;
+
+ hash2 = (HV *)SvRV(hv_iterval(hash1, entry1));
+ hv_iterinit(hash2);
+
+ /* Iterate over each module:instance */
+ while (entry2 = hv_iternext(hash2)) {
+ HV *hash3;
+ HE *entry3;
+
+ hash3 = (HV *)SvRV(hv_iterval(hash2, entry2));
+ hv_iterinit(hash3);
+
+ /* Iterate over each module:instance:name */
+ while (entry3 = hv_iternext(hash3)) {
+ HV *hash4;
+ MAGIC *mg;
+ HV *tie;
+
+ /* Get the tie */
+ hash4 = (HV *)SvRV(hv_iterval(hash3, entry3));
+ mg = mg_find((SV *)hash4, 'P');
+ PERL_ASSERTMSG(mg != 0,
+ "apply_to_ties: lost P magic");
+
+ /* Apply the callback */
+ if (! cb((HV *)SvRV(mg->mg_obj), arg)) {
+ ret = 0;
+ }
+ }
+ }
+ }
+ return (ret);
+}
+
+/*
+ * Mark this HV as valid - used by update() when pruning deleted kstat nodes
+ */
+
+static int
+set_valid(HV *self, void *arg)
+{
+ MAGIC *mg;
+
+ mg = mg_find((SV *)self, '~');
+ PERL_ASSERTMSG(mg != 0, "set_valid: lost ~ magic");
+ ((KstatInfo_t *)SvPVX(mg->mg_obj))->valid = (int)arg;
+ return (1);
+}
+
+/*
+ * Prune invalid kstat nodes. This is called when kstat_chain_update() detects
+ * that the kstat chain has been updated. This removes any hash tree entries
+ * that no longer have a corresponding kstat. If del is non-null it will be
+ * set to the keys of the deleted kstat nodes, if any. If any entries are
+ * deleted 1 will be retured, otherwise 0
+ */
+
+static int
+prune_invalid(SV *self, AV *del)
+{
+ HV *hash1;
+ HE *entry1;
+ STRLEN klen;
+ char *module, *instance, *name, *key;
+ int ret;
+
+ hash1 = (HV *)SvRV(self);
+ hv_iterinit(hash1);
+ ret = 0;
+
+ /* Iterate over each module */
+ while (entry1 = hv_iternext(hash1)) {
+ HV *hash2;
+ HE *entry2;
+
+ module = HePV(entry1, PL_na);
+ hash2 = (HV *)SvRV(hv_iterval(hash1, entry1));
+ hv_iterinit(hash2);
+
+ /* Iterate over each module:instance */
+ while (entry2 = hv_iternext(hash2)) {
+ HV *hash3;
+ HE *entry3;
+
+ instance = HePV(entry2, PL_na);
+ hash3 = (HV *)SvRV(hv_iterval(hash2, entry2));
+ hv_iterinit(hash3);
+
+ /* Iterate over each module:instance:name */
+ while (entry3 = hv_iternext(hash3)) {
+ HV *hash4;
+ MAGIC *mg;
+ HV *tie;
+
+ name = HePV(entry3, PL_na);
+ hash4 = (HV *)SvRV(hv_iterval(hash3, entry3));
+ mg = mg_find((SV *)hash4, 'P');
+ PERL_ASSERTMSG(mg != 0,
+ "prune_invalid: lost P magic");
+ tie = (HV *)SvRV(mg->mg_obj);
+ mg = mg_find((SV *)tie, '~');
+ PERL_ASSERTMSG(mg != 0,
+ "prune_invalid: lost ~ magic");
+
+ /* If this is marked as invalid, prune it */
+ if (((KstatInfo_t *)SvPVX(
+ (SV *)mg->mg_obj))->valid == FALSE) {
+ SvREADONLY_off(hash3);
+ key = HePV(entry3, klen);
+ hv_delete(hash3, key, klen, G_DISCARD);
+ SvREADONLY_on(hash3);
+ if (del) {
+ av_push(del,
+ newSVpvf("%s:%s:%s",
+ module, instance, name));
+ }
+ ret = 1;
+ }
+ }
+
+ /* If the module:instance:name hash is empty prune it */
+ if (HvKEYS(hash3) == 0) {
+ SvREADONLY_off(hash2);
+ key = HePV(entry2, klen);
+ hv_delete(hash2, key, klen, G_DISCARD);
+ SvREADONLY_on(hash2);
+ }
+ }
+ /* If the module:instance hash is empty prune it */
+ if (HvKEYS(hash2) == 0) {
+ SvREADONLY_off(hash1);
+ key = HePV(entry1, klen);
+ hv_delete(hash1, key, klen, G_DISCARD);
+ SvREADONLY_on(hash1);
+ }
+ }
+ return (ret);
+}
+
+/*
+ * Named kstats are returned as a list of key/values. This function converts
+ * such a list into the equivalent perl datatypes, and stores them in the passed
+ * hash.
+ */
+
+static void
+save_named(HV *self, kstat_t *kp, int strip_str)
+{
+ kstat_named_t *knp;
+ int n;
+ SV* value;
+
+ for (n = kp->ks_ndata, knp = KSTAT_NAMED_PTR(kp); n > 0; n--, knp++) {
+ switch (knp->data_type) {
+ case KSTAT_DATA_CHAR:
+ value = newSVpv(knp->value.c, strip_str ?
+ strlen(knp->value.c) : sizeof (knp->value.c));
+ break;
+ case KSTAT_DATA_INT32:
+ value = newSViv(knp->value.i32);
+ break;
+ case KSTAT_DATA_UINT32:
+ value = NEW_UV(knp->value.ui32);
+ break;
+ case KSTAT_DATA_INT64:
+ value = NEW_UV(knp->value.i64);
+ break;
+ case KSTAT_DATA_UINT64:
+ value = NEW_UV(knp->value.ui64);
+ break;
+ case KSTAT_DATA_STRING:
+ if (KSTAT_NAMED_STR_PTR(knp) == NULL)
+ value = newSVpv("null", sizeof ("null") - 1);
+ else
+ value = newSVpv(KSTAT_NAMED_STR_PTR(knp),
+ KSTAT_NAMED_STR_BUFLEN(knp) -1);
+ break;
+ default:
+ PERL_ASSERTMSG(0, "kstat_read: invalid data type");
+ break;
+ }
+ hv_store(self, knp->name, strlen(knp->name), value, 0);
+ }
+}
+
+/*
+ * Save kstat interrupt statistics
+ */
+
+static void
+save_intr(HV *self, kstat_t *kp, int strip_str)
+{
+ kstat_intr_t *kintrp;
+ int i;
+ static char *intr_names[] =
+ { "hard", "soft", "watchdog", "spurious", "multiple_service" };
+
+ PERL_ASSERT(kp->ks_ndata == 1);
+ PERL_ASSERT(kp->ks_data_size == sizeof (kstat_intr_t));
+ kintrp = KSTAT_INTR_PTR(kp);
+
+ for (i = 0; i < KSTAT_NUM_INTRS; i++) {
+ hv_store(self, intr_names[i], strlen(intr_names[i]),
+ NEW_UV(kintrp->intrs[i]), 0);
+ }
+}
+
+/*
+ * Save IO statistics
+ */
+
+static void
+save_io(HV *self, kstat_t *kp, int strip_str)
+{
+ kstat_io_t *kiop;
+
+ PERL_ASSERT(kp->ks_ndata == 1);
+ PERL_ASSERT(kp->ks_data_size == sizeof (kstat_io_t));
+ kiop = KSTAT_IO_PTR(kp);
+ SAVE_UINT64(self, kiop, nread);
+ SAVE_UINT64(self, kiop, nwritten);
+ SAVE_UINT32(self, kiop, reads);
+ SAVE_UINT32(self, kiop, writes);
+ SAVE_HRTIME(self, kiop, wtime);
+ SAVE_HRTIME(self, kiop, wlentime);
+ SAVE_HRTIME(self, kiop, wlastupdate);
+ SAVE_HRTIME(self, kiop, rtime);
+ SAVE_HRTIME(self, kiop, rlentime);
+ SAVE_HRTIME(self, kiop, rlastupdate);
+ SAVE_UINT32(self, kiop, wcnt);
+ SAVE_UINT32(self, kiop, rcnt);
+}
+
+/*
+ * Save timer statistics
+ */
+
+static void
+save_timer(HV *self, kstat_t *kp, int strip_str)
+{
+ kstat_timer_t *ktimerp;
+
+ PERL_ASSERT(kp->ks_ndata == 1);
+ PERL_ASSERT(kp->ks_data_size == sizeof (kstat_timer_t));
+ ktimerp = KSTAT_TIMER_PTR(kp);
+ SAVE_STRING(self, ktimerp, name, strip_str);
+ SAVE_UINT64(self, ktimerp, num_events);
+ SAVE_HRTIME(self, ktimerp, elapsed_time);
+ SAVE_HRTIME(self, ktimerp, min_time);
+ SAVE_HRTIME(self, ktimerp, max_time);
+ SAVE_HRTIME(self, ktimerp, start_time);
+ SAVE_HRTIME(self, ktimerp, stop_time);
+}
+
+/*
+ * Read kstats and copy into the supplied perl hash structure. If refresh is
+ * true, this function is being called as part of the update() method. In this
+ * case it is only necessary to read the kstats if they have previously been
+ * accessed (kip->read == TRUE). If refresh is false, this function is being
+ * called prior to returning a value to the caller. In this case, it is only
+ * necessary to read the kstats if they have not previously been read. If the
+ * kstat_read() fails, 0 is returned, otherwise 1
+ */
+
+static int
+read_kstats(HV *self, int refresh)
+{
+ MAGIC *mg;
+ KstatInfo_t *kip;
+ kstat_raw_reader_t fnp;
+
+ /* Find the MAGIC KstatInfo_t data structure */
+ mg = mg_find((SV *)self, '~');
+ PERL_ASSERTMSG(mg != 0, "read_kstats: lost ~ magic");
+ kip = (KstatInfo_t *)SvPVX(mg->mg_obj);
+
+ /* Return early if we don't need to actually read the kstats */
+ if ((refresh && ! kip->read) || (! refresh && kip->read)) {
+ return (1);
+ }
+
+ /* Read the kstats and return 0 if this fails */
+ if (kstat_read(kip->kstat_ctl, kip->kstat, NULL) < 0) {
+ return (0);
+ }
+
+ /* Save the read data */
+ hv_store(self, "snaptime", 8, NEW_HRTIME(kip->kstat->ks_snaptime), 0);
+ switch (kip->kstat->ks_type) {
+ case KSTAT_TYPE_RAW:
+ if ((fnp = lookup_raw_kstat_fn(kip->kstat->ks_module,
+ kip->kstat->ks_name)) != 0) {
+ fnp(self, kip->kstat, kip->strip_str);
+ }
+ break;
+ case KSTAT_TYPE_NAMED:
+ save_named(self, kip->kstat, kip->strip_str);
+ break;
+ case KSTAT_TYPE_INTR:
+ save_intr(self, kip->kstat, kip->strip_str);
+ break;
+ case KSTAT_TYPE_IO:
+ save_io(self, kip->kstat, kip->strip_str);
+ break;
+ case KSTAT_TYPE_TIMER:
+ save_timer(self, kip->kstat, kip->strip_str);
+ break;
+ default:
+ PERL_ASSERTMSG(0, "read_kstats: illegal kstat type");
+ break;
+ }
+ kip->read = TRUE;
+ return (1);
+}
+
+/*
+ * The XS code exported to perl is below here. Note that the XS preprocessor
+ * has its own commenting syntax, so all comments from this point on are in
+ * that form.
+ */
+
+/* The following XS methods are the ABI of the Sun::Solaris::Kstat package */
+
+MODULE = Sun::Solaris::Kstat PACKAGE = Sun::Solaris::Kstat
+PROTOTYPES: ENABLE
+
+ # Create the raw kstat to store function lookup table on load
+BOOT:
+ build_raw_kstat_lookup();
+
+ #
+ # The Sun::Solaris::Kstat constructor. This builds the nested
+ # name::instance::module hash structure, but doesn't actually read the
+ # underlying kstats. This is done on demand by the TIEHASH methods in
+ # Sun::Solaris::Kstat::_Stat
+ #
+
+SV*
+new(class, ...)
+ char *class;
+PREINIT:
+ HV *stash;
+ kstat_ctl_t *kc;
+ SV *kcsv;
+ kstat_t *kp;
+ KstatInfo_t kstatinfo;
+ int sp, strip_str;
+CODE:
+ /* Check we have an even number of arguments, excluding the class */
+ sp = 1;
+ if (((items - sp) % 2) != 0) {
+ croak(DEBUG_ID ": new: invalid number of arguments");
+ }
+
+ /* Process any (name => value) arguments */
+ strip_str = 0;
+ while (sp < items) {
+ SV *name, *value;
+
+ name = ST(sp);
+ sp++;
+ value = ST(sp);
+ sp++;
+ if (strcmp(SvPVX(name), "strip_strings") == 0) {
+ strip_str = SvTRUE(value);
+ } else {
+ croak(DEBUG_ID ": new: invalid parameter name '%s'",
+ SvPVX(name));
+ }
+ }
+
+ /* Open the kstats handle */
+ if ((kc = kstat_open()) == 0) {
+ XSRETURN_UNDEF;
+ }
+
+ /* Create a blessed hash ref */
+ RETVAL = (SV *)newRV_noinc((SV *)newHV());
+ stash = gv_stashpv(class, TRUE);
+ sv_bless(RETVAL, stash);
+
+ /* Create a place to save the KstatInfo_t structure */
+ kcsv = newSVpv((char *)&kc, sizeof (kc));
+ sv_magic(SvRV(RETVAL), kcsv, '~', 0, 0);
+ SvREFCNT_dec(kcsv);
+
+ /* Initialise the KstatsInfo_t structure */
+ kstatinfo.read = FALSE;
+ kstatinfo.valid = TRUE;
+ kstatinfo.strip_str = strip_str;
+ kstatinfo.kstat_ctl = kc;
+
+ /* Scan the kstat chain, building hash entries for the kstats */
+ for (kp = kc->kc_chain; kp != 0; kp = kp->ks_next) {
+ HV *tie;
+ SV *kstatsv;
+
+ /* Don't bother storing the kstat headers */
+ if (strncmp(kp->ks_name, "kstat_", 6) == 0) {
+ continue;
+ }
+
+ /* Don't bother storing raw stats we don't understand */
+ if (kp->ks_type == KSTAT_TYPE_RAW &&
+ lookup_raw_kstat_fn(kp->ks_module, kp->ks_name) == 0) {
+#ifdef REPORT_UNKNOWN
+ (void) fprintf(stderr,
+ "Unknown kstat type %s:%d:%s - %d of size %d\n",
+ kp->ks_module, kp->ks_instance, kp->ks_name,
+ kp->ks_ndata, kp->ks_data_size);
+#endif
+ continue;
+ }
+
+ /* Create a 3-layer hash hierarchy - module.instance.name */
+ tie = get_tie(RETVAL, kp->ks_module, kp->ks_instance,
+ kp->ks_name, 0);
+
+ /* Save the data necessary to read the kstat info on demand */
+ hv_store(tie, "class", 5, newSVpv(kp->ks_class, 0), 0);
+ hv_store(tie, "crtime", 6, NEW_HRTIME(kp->ks_crtime), 0);
+ kstatinfo.kstat = kp;
+ kstatsv = newSVpv((char *)&kstatinfo, sizeof (kstatinfo));
+ sv_magic((SV *)tie, kstatsv, '~', 0, 0);
+ SvREFCNT_dec(kstatsv);
+ }
+ SvREADONLY_on(SvRV(RETVAL));
+ /* SvREADONLY_on(RETVAL); */
+OUTPUT:
+ RETVAL
+
+ #
+ # Update the perl hash structure so that it is in line with the kernel kstats
+ # data. Only kstats athat have previously been accessed are read,
+ #
+
+ # Scalar context: true/false
+ # Array context: (\@added, \@deleted)
+void
+update(self)
+ SV* self;
+PREINIT:
+ MAGIC *mg;
+ kstat_ctl_t *kc;
+ kstat_t *kp;
+ int ret;
+ AV *add, *del;
+PPCODE:
+ /* Find the hidden KstatInfo_t structure */
+ mg = mg_find(SvRV(self), '~');
+ PERL_ASSERTMSG(mg != 0, "update: lost ~ magic");
+ kc = *(kstat_ctl_t **)SvPVX(mg->mg_obj);
+
+ /* Update the kstat chain, and return immediately on error. */
+ if ((ret = kstat_chain_update(kc)) == -1) {
+ if (GIMME_V == G_ARRAY) {
+ EXTEND(SP, 2);
+ PUSHs(sv_newmortal());
+ PUSHs(sv_newmortal());
+ } else {
+ EXTEND(SP, 1);
+ PUSHs(sv_2mortal(newSViv(ret)));
+ }
+ }
+
+ /* Create the arrays to be returned if in an array context */
+ if (GIMME_V == G_ARRAY) {
+ add = newAV();
+ del = newAV();
+ } else {
+ add = 0;
+ del = 0;
+ }
+
+ /*
+ * If the kstat chain hasn't changed we can just reread any stats
+ * that have already been read
+ */
+ if (ret == 0) {
+ if (! apply_to_ties(self, (ATTCb_t)read_kstats, (void *)TRUE)) {
+ if (GIMME_V == G_ARRAY) {
+ EXTEND(SP, 2);
+ PUSHs(sv_2mortal(newRV_noinc((SV *)add)));
+ PUSHs(sv_2mortal(newRV_noinc((SV *)del)));
+ } else {
+ EXTEND(SP, 1);
+ PUSHs(sv_2mortal(newSViv(-1)));
+ }
+ }
+
+ /*
+ * Otherwise we have to update the Perl structure so that it is in
+ * agreement with the new kstat chain. We do this in such a way as to
+ * retain all the existing structures, just adding or deleting the
+ * bare minimum.
+ */
+ } else {
+ KstatInfo_t kstatinfo;
+
+ /*
+ * Step 1: set the 'invalid' flag on each entry
+ */
+ apply_to_ties(self, &set_valid, (void *)FALSE);
+
+ /*
+ * Step 2: Set the 'valid' flag on all entries still in the
+ * kernel kstat chain
+ */
+ kstatinfo.read = FALSE;
+ kstatinfo.valid = TRUE;
+ kstatinfo.kstat_ctl = kc;
+ for (kp = kc->kc_chain; kp != 0; kp = kp->ks_next) {
+ int new;
+ HV *tie;
+
+ /* Don't bother storing the kstat headers or types */
+ if (strncmp(kp->ks_name, "kstat_", 6) == 0) {
+ continue;
+ }
+
+ /* Don't bother storing raw stats we don't understand */
+ if (kp->ks_type == KSTAT_TYPE_RAW &&
+ lookup_raw_kstat_fn(kp->ks_module, kp->ks_name)
+ == 0) {
+#ifdef REPORT_UNKNOWN
+ (void) printf("Unknown kstat type %s:%d:%s "
+ "- %d of size %d\n", kp->ks_module,
+ kp->ks_instance, kp->ks_name,
+ kp->ks_ndata, kp->ks_data_size);
+#endif
+ continue;
+ }
+
+ /* Find the tied hash associated with the kstat entry */
+ tie = get_tie(self, kp->ks_module, kp->ks_instance,
+ kp->ks_name, &new);
+
+ /* If newly created store the associated kstat info */
+ if (new) {
+ SV *kstatsv;
+
+ /*
+ * Save the data necessary to read the kstat
+ * info on demand
+ */
+ hv_store(tie, "class", 5,
+ newSVpv(kp->ks_class, 0), 0);
+ hv_store(tie, "crtime", 6,
+ NEW_HRTIME(kp->ks_crtime), 0);
+ kstatinfo.kstat = kp;
+ kstatsv = newSVpv((char *)&kstatinfo,
+ sizeof (kstatinfo));
+ sv_magic((SV *)tie, kstatsv, '~', 0, 0);
+ SvREFCNT_dec(kstatsv);
+
+ /* Save the key on the add list, if required */
+ if (GIMME_V == G_ARRAY) {
+ av_push(add, newSVpvf("%s:%d:%s",
+ kp->ks_module, kp->ks_instance,
+ kp->ks_name));
+ }
+
+ /* If the stats already exist, just update them */
+ } else {
+ MAGIC *mg;
+ KstatInfo_t *kip;
+
+ /* Find the hidden KstatInfo_t */
+ mg = mg_find((SV *)tie, '~');
+ PERL_ASSERTMSG(mg != 0, "update: lost ~ magic");
+ kip = (KstatInfo_t *)SvPVX(mg->mg_obj);
+
+ /* Mark the tie as valid */
+ kip->valid = TRUE;
+
+ /* Re-save the kstat_t pointer. If the kstat
+ * has been deleted and re-added since the last
+ * update, the address of the kstat structure
+ * will have changed, even though the kstat will
+ * still live at the same place in the perl
+ * hash tree structure.
+ */
+ kip->kstat = kp;
+
+ /* Reread the stats, if read previously */
+ read_kstats(tie, TRUE);
+ }
+ }
+
+ /*
+ *Step 3: Delete any entries still marked as 'invalid'
+ */
+ ret = prune_invalid(self, del);
+
+ }
+ if (GIMME_V == G_ARRAY) {
+ EXTEND(SP, 2);
+ PUSHs(sv_2mortal(newRV_noinc((SV *)add)));
+ PUSHs(sv_2mortal(newRV_noinc((SV *)del)));
+ } else {
+ EXTEND(SP, 1);
+ PUSHs(sv_2mortal(newSViv(ret)));
+ }
+
+
+ #
+ # Destructor. Closes the kstat connection
+ #
+
+void
+DESTROY(self)
+ SV *self;
+PREINIT:
+ MAGIC *mg;
+ kstat_ctl_t *kc;
+CODE:
+ mg = mg_find(SvRV(self), '~');
+ PERL_ASSERTMSG(mg != 0, "DESTROY: lost ~ magic");
+ kc = *(kstat_ctl_t **)SvPVX(mg->mg_obj);
+ if (kstat_close(kc) != 0) {
+ croak(DEBUG_ID ": kstat_close: failed");
+ }
+
+ #
+ # The following XS methods implement the TIEHASH mechanism used to update the
+ # kstats hash structure. These are blessed into a package that isn't
+ # visible to callers of the Sun::Solaris::Kstat module
+ #
+
+MODULE = Sun::Solaris::Kstat PACKAGE = Sun::Solaris::Kstat::_Stat
+PROTOTYPES: ENABLE
+
+ #
+ # If a value has already been read, return it. Otherwise read the appropriate
+ # kstat and then return the value
+ #
+
+SV*
+FETCH(self, key)
+ SV* self;
+ SV* key;
+PREINIT:
+ char *k;
+ STRLEN klen;
+ SV **value;
+CODE:
+ self = SvRV(self);
+ k = SvPV(key, klen);
+ if (strNE(k, "class") && strNE(k, "crtime")) {
+ read_kstats((HV *)self, FALSE);
+ }
+ value = hv_fetch((HV *)self, k, klen, FALSE);
+ if (value) {
+ RETVAL = *value; SvREFCNT_inc(RETVAL);
+ } else {
+ RETVAL = &PL_sv_undef;
+ }
+OUTPUT:
+ RETVAL
+
+ #
+ # Save the passed value into the kstat hash. Read the appropriate kstat first,
+ # if necessary. Note that this DOES NOT update the underlying kernel kstat
+ # structure.
+ #
+
+SV*
+STORE(self, key, value)
+ SV* self;
+ SV* key;
+ SV* value;
+PREINIT:
+ char *k;
+ STRLEN klen;
+CODE:
+ self = SvRV(self);
+ k = SvPV(key, klen);
+ if (strNE(k, "class") && strNE(k, "crtime")) {
+ read_kstats((HV *)self, FALSE);
+ }
+ SvREFCNT_inc(value);
+ RETVAL = *(hv_store((HV *)self, k, klen, value, 0));
+ SvREFCNT_inc(RETVAL);
+OUTPUT:
+ RETVAL
+
+ #
+ # Check for the existence of the passed key. Read the kstat first if necessary
+ #
+
+bool
+EXISTS(self, key)
+ SV* self;
+ SV* key;
+PREINIT:
+ char *k;
+CODE:
+ self = SvRV(self);
+ k = SvPV(key, PL_na);
+ if (strNE(k, "class") && strNE(k, "crtime")) {
+ read_kstats((HV *)self, FALSE);
+ }
+ RETVAL = hv_exists_ent((HV *)self, key, 0);
+OUTPUT:
+ RETVAL
+
+
+ #
+ # Hash iterator initialisation. Read the kstats if necessary.
+ #
+
+SV*
+FIRSTKEY(self)
+ SV* self;
+PREINIT:
+ HE *he;
+PPCODE:
+ self = SvRV(self);
+ read_kstats((HV *)self, FALSE);
+ hv_iterinit((HV *)self);
+ if (he = hv_iternext((HV *)self)) {
+ EXTEND(SP, 1);
+ PUSHs(hv_iterkeysv(he));
+ }
+
+ #
+ # Return hash iterator next value. Read the kstats if necessary.
+ #
+
+SV*
+NEXTKEY(self, lastkey)
+ SV* self;
+ SV* lastkey;
+PREINIT:
+ HE *he;
+PPCODE:
+ self = SvRV(self);
+ if (he = hv_iternext((HV *)self)) {
+ EXTEND(SP, 1);
+ PUSHs(hv_iterkeysv(he));
+ }
+
+
+ #
+ # Delete the specified hash entry.
+ #
+
+SV*
+DELETE(self, key)
+ SV *self;
+ SV *key;
+CODE:
+ self = SvRV(self);
+ RETVAL = hv_delete_ent((HV *)self, key, 0, 0);
+ if (RETVAL) {
+ SvREFCNT_inc(RETVAL);
+ } else {
+ RETVAL = &PL_sv_undef;
+ }
+OUTPUT:
+ RETVAL
+
+ #
+ # Clear the entire hash. This will stop any update() calls rereading this
+ # kstat until it is accessed again.
+ #
+
+void
+CLEAR(self)
+ SV* self;
+PREINIT:
+ MAGIC *mg;
+ KstatInfo_t *kip;
+CODE:
+ self = SvRV(self);
+ hv_clear((HV *)self);
+ mg = mg_find(self, '~');
+ PERL_ASSERTMSG(mg != 0, "CLEAR: lost ~ magic");
+ kip = (KstatInfo_t *)SvPVX(mg->mg_obj);
+ kip->read = FALSE;
+ kip->valid = TRUE;
+ hv_store((HV *)self, "class", 5, newSVpv(kip->kstat->ks_class, 0), 0);
+ hv_store((HV *)self, "crtime", 6, NEW_HRTIME(kip->kstat->ks_crtime), 0);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/perl/intrd.pl Mon Apr 19 14:25:41 2010 -0400
@@ -0,0 +1,1408 @@
+#!/usr/perl5/bin/perl
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+require 5.8.4;
+use strict;
+use warnings;
+use POSIX;
+use File::Basename("basename");
+
+my $cmdname = basename($0);
+
+my $using_scengen = 0; # 1 if using scenario simulator
+my $debug = 0;
+
+my $normal_sleeptime = 10; # time to sleep between samples
+my $idle_sleeptime = 45; # time to sleep when idle
+my $onecpu_sleeptime = (60 * 15); # used if only 1 CPU on system
+my $sleeptime = $normal_sleeptime; # either normal_ or idle_ or onecpu_
+
+my $idle_intrload = .1; # idle if interrupt load < 10%
+
+my $timerange_toohi = .01;
+my $statslen = 60; # time period (in secs) to keep in @deltas
+
+
+# Parse arguments. intrd does not accept any public arguments; the two
+# arguments below are meant for testing purposes. -D generates a significant
+# amount of syslog output. -S <filename> loads the filename as a perl
+# script. That file is expected to implement a kstat "simulator" which
+# can be used to feed information to intrd and verify intrd's responses.
+
+while ($_ = shift @ARGV) {
+ if ($_ eq "-S" && $#ARGV != -1) {
+ $using_scengen = 1;
+ do $ARGV[0]; # load simulator
+ shift @ARGV;
+ } elsif ($_ eq "-D") {
+ $debug = 1;
+ }
+}
+
+if ($using_scengen == 0) {
+ require Sun::Solaris::Kstat;
+ require Sun::Solaris::Intrs;
+ import Sun::Solaris::Intrs(qw(intrmove is_pcplusmp));
+ require Sys::Syslog;
+ import Sys::Syslog;
+ openlog($cmdname, 'pid', 'daemon');
+ setlogmask(Sys::Syslog::LOG_UPTO($debug > 0 ? &Sys::Syslog::LOG_DEBUG :
+ &Sys::Syslog::LOG_INFO));
+}
+
+my $asserted = 0;
+my $assert_level = 'debug'; # syslog level for assertion failures
+sub VERIFY($@)
+{
+ my $bad = (shift() == 0); # $_[0] == 0 means assert failed
+ if ($bad) {
+ my $msg = shift();
+ syslog($assert_level, "VERIFY: $msg", @_);
+ $asserted++;
+ }
+ return ($bad);
+}
+
+
+
+
+sub getstat($$);
+sub generate_delta($$);
+sub compress_deltas($);
+sub dumpdelta($);
+
+sub goodness($);
+sub imbalanced($$);
+sub do_reconfig($);
+
+sub goodness_cpu($$); # private function
+sub move_intr($$$$); # private function
+sub ivecs_to_string(@); # private function
+sub do_find_goal($$$$); # private function
+sub find_goal($$); # private function
+sub do_reconfig_cpu2cpu($$$$); # private function
+sub do_reconfig_cpu($$$); # private function
+
+
+#
+# What follow are the basic data structures routines of intrd.
+#
+# getstat() is responsible for reading the kstats and generating a "stat" hash.
+#
+# generate_delta() is responsible for taking two "stat" hashes and creating
+# a new "delta" hash that represents what has changed over time.
+#
+# compress_deltas() is responsible for taking a list of deltas and generating
+# a single delta hash that encompasses all the time periods described by the
+# deltas.
+
+
+#
+# getstat() is handed a reference to a kstat and generates a hash, returned
+# by reference, containing all the fields from the kstats which we need.
+# If it returns the scalar 0, it failed to gather the kstats, and the caller
+# should react accordingly.
+#
+# getstat() is also responsible for maintaining a reasonable $sleeptime.
+#
+# {"snaptime"} kstat's snaptime
+# {<cpuid>} one hash reference per online cpu
+# ->{"tot"} == cpu:<cpuid>:sys:cpu_nsec_{user + kernel + idle}
+# ->{"crtime"} == cpu:<cpuid>:sys:crtime
+# ->{"ivecs"}
+# ->{<cookie#>} iterates over pci_intrs::<nexus>:cookie
+# ->{"time"} == pci_intrs:<ivec#>:<nexus>:time (in nsec)
+# ->{"pil"} == pci_intrs:<ivec#>:<nexus>:pil
+# ->{"crtime"} == pci_intrs:<ivec#>:<nexus>:crtime
+# ->{"ino"} == pci_intrs:<ivec#>:<nexus>:ino
+# ->{"num_ino"} == num inos of single device instance sharing this entry
+# Will be > 1 on pcplusmp X86 systems for devices
+# with multiple MSI interrupts.
+# ->{"buspath"} == pci_intrs:<ivec#>:<nexus>:buspath
+# ->{"name"} == pci_intrs:<ivec#>:<nexus>:name
+# ->{"ihs"} == pci_intrs:<ivec#>:<nexus>:ihs
+#
+
+sub getstat($$)
+{
+ my ($ks, $pcplusmp_sys) = @_;
+
+ my $cpucnt = 0;
+ my %stat = ();
+ my ($minsnap, $maxsnap);
+
+ # Hash of hash which matches (MSI device, ino) combos to kstats.
+ my %msidevs = ();
+
+ # kstats are not generated atomically. Each kstat hierarchy will
+ # have been generated within the kernel at a different time. On a
+ # thrashing system, we may not run quickly enough in order to get
+ # coherent kstat timing information across all the kstats. To
+ # determine if this is occurring, $minsnap/$maxsnap are used to
+ # find the breadth between the first and last snaptime of all the
+ # kstats we access. $maxsnap - $minsnap roughly represents the
+ # total time taken up in getstat(). If this time approaches the
+ # time between snapshots, our results may not be useful.
+
+ $minsnap = -1; # snaptime is always a positive number
+ $maxsnap = $minsnap;
+
+ # Iterate over the cpus in cpu:<cpuid>::. Check
+ # cpu_info:<cpuid>:cpu_info<cpuid>:state to make sure the
+ # processor is "on-line". If not, it isn't accepting interrupts
+ # and doesn't concern us.
+ #
+ # Record cpu:<cpuid>:sys:snaptime, and check $minsnap/$maxsnap.
+
+ while (my ($cpu, $cpst) = each %{$ks->{cpu}}) {
+ next if !exists($ks->{cpu_info}{$cpu}{"cpu_info$cpu"}{state});
+ #"state" fld of kstat w/
+ # modname inst name-"cpuinfo0"
+ my $state = $ks->{cpu_info}{$cpu}{"cpu_info$cpu"}{state};
+ next if ($state !~ /^on-line\0/);
+ my $cpu_sys = $cpst->{sys};
+
+ $stat{$cpu}{tot} = ($cpu_sys->{cpu_nsec_idle} +
+ $cpu_sys->{cpu_nsec_user} +
+ $cpu_sys->{cpu_nsec_kernel});
+ $stat{$cpu}{crtime} = $cpu_sys->{crtime};
+ $stat{$cpu}{ivecs} = {};
+
+ if ($minsnap == -1 || $cpu_sys->{snaptime} < $minsnap) {
+ $minsnap = $cpu_sys->{snaptime};
+ }
+ if ($cpu_sys->{snaptime} > $maxsnap) {
+ $maxsnap = $cpu_sys->{snaptime};
+ }
+ $cpucnt++;
+ }
+
+ if ($cpucnt <= 1) {
+ $sleeptime = $onecpu_sleeptime;
+ return (0); # nothing to do with 1 CPU
+ }
+
+ # Iterate over the ivecs. If the cpu is not on-line, ignore the
+ # ivecs mapped to it, if any.
+ #
+ # Record pci_intrs:{inum}:<nexus>:time, snaptime, crtime, pil,
+ # ino, name, and buspath. Check $minsnap/$maxsnap.
+
+ foreach my $inst (values(%{$ks->{pci_intrs}})) {
+ my $intrcfg = (values(%$inst))[0];
+ my $cpu = $intrcfg->{cpu};
+
+ next unless exists $stat{$cpu};
+ next if ($intrcfg->{type} =~ /^disabled\0/);
+
+ # Perl looks beyond NULL chars in pattern matching.
+ # Truncate name field at the first NULL
+ $intrcfg->{name} =~ s/\0.*$//;
+
+ if ($intrcfg->{snaptime} < $minsnap) {
+ $minsnap = $intrcfg->{snaptime};
+ } elsif ($intrcfg->{snaptime} > $maxsnap) {
+ $maxsnap = $intrcfg->{snaptime};
+ }
+
+ my $cookie = "$intrcfg->{buspath} $intrcfg->{ino}";
+ if (exists $stat{$cpu}{ivecs}{$cookie}) {
+ my $cookiestats = $stat{$cpu}{ivecs}{$cookie};
+
+ $cookiestats->{time} += $intrcfg->{time};
+ $cookiestats->{name} .= "/$intrcfg->{name}";
+
+ # If this new interrupt sharing $cookie represents a
+ # change from an earlier getstat, make sure that
+ # generate_delta will see the change by setting
+ # crtime to the most recent crtime of its components.
+
+ if ($intrcfg->{crtime} > $cookiestats->{crtime}) {
+ $cookiestats->{crtime} = $intrcfg->{crtime};
+ }
+ $cookiestats->{ihs}++;
+ next;
+ }
+ $stat{$cpu}{ivecs}{$cookie}{time} = $intrcfg->{time};
+ $stat{$cpu}{ivecs}{$cookie}{crtime} = $intrcfg->{crtime};
+ $stat{$cpu}{ivecs}{$cookie}{pil} = $intrcfg->{pil};
+ $stat{$cpu}{ivecs}{$cookie}{ino} = $intrcfg->{ino};
+ $stat{$cpu}{ivecs}{$cookie}{num_ino} = 1;
+ $stat{$cpu}{ivecs}{$cookie}{buspath} = $intrcfg->{buspath};
+ $stat{$cpu}{ivecs}{$cookie}{name} = $intrcfg->{name};
+ $stat{$cpu}{ivecs}{$cookie}{ihs} = 1;
+
+ if ($pcplusmp_sys && ($intrcfg->{type} =~ /^msi\0/)) {
+ if (!(exists($msidevs{$intrcfg->{name}}))) {
+ $msidevs{$intrcfg->{name}} = {};
+ }
+ $msidevs{$intrcfg->{name}}{$intrcfg->{ino}} =
+ \$stat{$cpu}{ivecs}{$cookie};
+ }
+ }
+
+ # All MSI interrupts of a device instance share a single MSI address.
+ # On X86 systems with an APIC, this MSI address is interpreted as CPU
+ # routing info by the APIC. For this reason, on these platforms, all
+ # interrupts for MSI devices must be moved to the same CPU at the same
+ # time.
+ #
+ # Since all interrupts will be on the same CPU on these platforms, all
+ # interrupts can be consolidated into one ivec entry. For such devices,
+ # num_ino will be > 1 to denote that a group move is needed.
+
+ # Loop thru all MSI devices on X86 pcplusmp systems.
+ # Nop on other systems.
+ foreach my $msidevkey (sort keys %msidevs) {
+
+ # Loop thru inos of the device, sorted by lowest value first
+ # For each cookie found for a device, incr num_ino for the
+ # lowest cookie and remove other cookies.
+
+ # Assumes PIL is the same for first and current cookies
+
+ my $first_ino = -1;
+ my $first_cookiep;
+ my $curr_cookiep;
+ foreach my $inokey (sort keys %{$msidevs{$msidevkey}}) {
+ $curr_cookiep = $msidevs{$msidevkey}{$inokey};
+ if ($first_ino == -1) {
+ $first_ino = $inokey;
+ $first_cookiep = $curr_cookiep;
+ } else {
+ $$first_cookiep->{num_ino}++;
+ $$first_cookiep->{time} +=
+ $$curr_cookiep->{time};
+ if ($$curr_cookiep->{crtime} >
+ $$first_cookiep->{crtime}) {
+ $$first_cookiep->{crtime} =
+ $$curr_cookiep->{crtime};
+ }
+ # Invalidate this cookie, less complicated and
+ # more efficient than deleting it.
+ $$curr_cookiep->{num_ino} = 0;
+ }
+ }
+ }
+
+ # We define the timerange as the amount of time spent gathering the
+ # various kstats, divided by our sleeptime. If we take a lot of time
+ # to access the kstats, and then we create a delta comparing these
+ # kstats with a prior set of kstats, that delta will cover
+ # substaintially different amount of time depending upon which
+ # interrupt or CPU is being examined.
+ #
+ # By checking the timerange here, we guarantee that any deltas
+ # created from these kstats will contain self-consistent data,
+ # in that all CPUs and interrupts cover a similar span of time.
+ #
+ # $timerange_toohi is the upper bound. Any timerange above
+ # this is thrown out as garbage. If the stat is safely within this
+ # bound, we treat the stat as representing an instant in time, rather
+ # than the time range it actually spans. We arbitrarily choose minsnap
+ # as the snaptime of the stat.
+
+ $stat{snaptime} = $minsnap;
+ my $timerange = ($maxsnap - $minsnap) / $sleeptime;
+ return (0) if ($timerange > $timerange_toohi); # i.e. failure
+ return (\%stat);
+}
+
+#
+# dumpdelta takes a reference to our "delta" structure:
+# {"missing"} "1" if the delta's component stats had inconsistencies
+# {"minsnap"} time of the first kstat snaptime used in this delta
+# {"maxsnap"} time of the last kstat snaptime used in this delta
+# {"goodness"} cost function applied to this delta
+# {"avgintrload"} avg of interrupt load across cpus, as a percentage
+# {"avgintrnsec"} avg number of nsec spent in interrupts, per cpu
+# {<cpuid>} iterates over on-line cpus
+# ->{"intrs"} cpu's movable intr time (sum of "time" for each ivec)
+# ->{"tot"} CPU load from all sources in nsec
+# ->{"bigintr"} largest value of {ivecs}{<ivec#>}{time} from below
+# ->{"intrload"} intrs / tot
+# ->{"ivecs"}
+# ->{<ivec#>} iterates over ivecs for this cpu
+# ->{"time"} time used by this interrupt (in nsec)
+# ->{"pil"} pil level of this interrupt
+# ->{"ino"} interrupt number (or base vector if MSI group)
+# ->{"buspath"} filename of the directory of the device's bus
+# ->{"name"} device name
+# ->{"ihs"} number of different handlers sharing this ino
+# ->{"num_ino"} number of interrupt vectors in MSI group
+#
+# It prints out the delta structure in a nice, human readable display.
+#
+
+sub dumpdelta($)
+{
+ my ($delta) = @_;
+
+ # print global info
+
+ syslog('debug', "dumpdelta:");
+ syslog('debug', " RECONFIGURATION IN DELTA") if $delta->{missing} > 0;
+ syslog('debug', " avgintrload: %5.2f%% avgintrnsec: %d",
+ $delta->{avgintrload} * 100, $delta->{avgintrnsec});
+ syslog('debug', " goodness: %5.2f%%", $delta->{goodness} * 100)
+ if exists($delta->{goodness});
+
+ # iterate over cpus
+
+ while (my ($cpu, $cpst) = each %$delta) {
+ next if !ref($cpst); # skip non-cpuid entries
+ my $tot = $cpst->{tot};
+ syslog('debug', " cpu %3d intr %7.3f%% (bigintr %7.3f%%)",
+ $cpu, $cpst->{intrload}*100, $cpst->{bigintr}*100/$tot);
+ syslog('debug', " intrs %d, bigintr %d",
+ $cpst->{intrs}, $cpst->{bigintr});
+
+ # iterate over ivecs on this cpu
+
+ while (my ($ivec, $ivst) = each %{$cpst->{ivecs}}) {
+ syslog('debug', " %15s:\"%s\": %7.3f%% %d",
+ ($ivst->{ihs} > 1 ? "$ivst->{name}($ivst->{ihs})" :
+ $ivst->{name}), $ivec,
+ $ivst->{time}*100 / $tot, $ivst->{time});
+ }
+ }
+}
+
+#
+# generate_delta($stat, $newstat) takes two stat references, returned from
+# getstat(), and creates a %delta. %delta (not surprisingly) contains the
+# same basic info as stat and newstat, but with the timestamps as deltas
+# instead of absolute times. We return a reference to the delta.
+#
+
+sub generate_delta($$)
+{
+ my ($stat, $newstat) = @_;
+
+ my %delta = ();
+ my $intrload;
+ my $intrnsec;
+ my $cpus;
+
+ # Take the worstcase timerange
+ $delta{minsnap} = $stat->{snaptime};
+ $delta{maxsnap} = $newstat->{snaptime};
+ if (VERIFY($delta{maxsnap} > $delta{minsnap},
+ "generate_delta: stats aren't ascending")) {
+ $delta{missing} = 1;
+ return (\%delta);
+ }
+
+ # if there are a different number of cpus in the stats, set missing
+
+ $delta{missing} = (keys(%$stat) != keys(%$newstat));
+ if (VERIFY($delta{missing} == 0,
+ "generate_delta: number of CPUs changed")) {
+ return (\%delta);
+ }
+
+ # scan through every cpu in %newstat and compare against %stat
+
+ while (my ($cpu, $newcpst) = each %$newstat) {
+ next if !ref($newcpst); # skip non-cpuid fields
+
+ # If %stat is missing a cpu from %newstat, then it was just
+ # onlined. Mark missing.
+
+ if (VERIFY(exists $stat->{$cpu} &&
+ $stat->{$cpu}{crtime} == $newcpst->{crtime},
+ "generate_delta: cpu $cpu changed")) {
+ $delta{missing} = 1;
+ return (\%delta);
+ }
+ my $cpst = $stat->{$cpu};
+ $delta{$cpu}{tot} = $newcpst->{tot} - $cpst->{tot};
+ if (VERIFY($delta{$cpu}{tot} >= 0,
+ "generate_delta: deltas are not ascending?")) {
+ $delta{missing} = 1;
+ delete($delta{$cpu});
+ return (\%delta);
+ }
+ # Avoid remote chance of division by zero
+ $delta{$cpu}{tot} = 1 if $delta{$cpu}{tot} == 0;
+ $delta{$cpu}{intrs} = 0;
+ $delta{$cpu}{bigintr} = 0;
+
+ my %ivecs = ();
+ $delta{$cpu}{ivecs} = \%ivecs;
+
+ # if the number of ivecs differs, set missing
+
+ if (VERIFY(keys(%{$cpst->{ivecs}}) ==
+ keys(%{$newcpst->{ivecs}}),
+ "generate_delta: cpu $cpu has more/less".
+ " interrupts")) {
+ $delta{missing} = 1;
+ return (\%delta);
+ }
+
+ while (my ($inum, $newivec) = each %{$newcpst->{ivecs}}) {
+
+ # Unused cookie, corresponding to an MSI vector which
+ # is part of a group. The whole group is accounted for
+ # by a different cookie.
+ next if ($newivec->{num_ino} == 0);
+
+ # If this ivec doesn't exist in $stat, or if $stat
+ # shows a different crtime, set missing.
+ if (VERIFY(exists $cpst->{ivecs}{$inum} &&
+ $cpst->{ivecs}{$inum}{crtime} ==
+ $newivec->{crtime},
+ "generate_delta: cpu $cpu inum $inum".
+ " has changed")) {
+ $delta{missing} = 1;
+ return (\%delta);
+ }
+ my $ivec = $cpst->{ivecs}{$inum};
+
+ # Create $delta{$cpu}{ivecs}{$inum}.
+
+ my %dltivec = ();
+ $delta{$cpu}{ivecs}{$inum} = \%dltivec;
+
+ # calculate time used by this interrupt
+
+ my $time = $newivec->{time} - $ivec->{time};
+ if (VERIFY($time >= 0,
+ "generate_delta: ivec went backwards?")) {
+ $delta{missing} = 1;
+ delete($delta{$cpu}{ivecs}{$inum});
+ return (\%delta);
+ }
+ $delta{$cpu}{intrs} += $time;
+ $dltivec{time} = $time;
+ if ($time > $delta{$cpu}{bigintr}) {
+ $delta{$cpu}{bigintr} = $time;
+ }
+
+ # Transfer over basic info about the kstat. We
+ # don't have to worry about discrepancies between
+ # ivec and newivec because we verified that both
+ # have the same crtime.
+
+ $dltivec{pil} = $newivec->{pil};
+ $dltivec{ino} = $newivec->{ino};
+ $dltivec{buspath} = $newivec->{buspath};
+ $dltivec{name} = $newivec->{name};
+ $dltivec{ihs} = $newivec->{ihs};
+ $dltivec{num_ino} = $newivec->{num_ino};
+ }
+ if ($delta{$cpu}{tot} < $delta{$cpu}{intrs}) {
+ # Ewww! Hopefully just a rounding error.
+ # Make something up.
+ $delta{$cpu}{tot} = $delta{$cpu}{intrs};
+ }
+ $delta{$cpu}{intrload} =
+ $delta{$cpu}{intrs} / $delta{$cpu}{tot};
+ $intrload += $delta{$cpu}{intrload};
+ $intrnsec += $delta{$cpu}{intrs};
+ $cpus++;
+ }
+ if ($cpus > 0) {
+ $delta{avgintrload} = $intrload / $cpus;
+ $delta{avgintrnsec} = $intrnsec / $cpus;
+ } else {
+ $delta{avgintrload} = 0;
+ $delta{avgintrnsec} = 0;
+ }
+ return (\%delta);
+}
+
+
+# compress_delta takes a list of deltas, and returns a single new delta
+# which represents the combined information from all the deltas. The deltas
+# provided are assumed to be sequential in time. The resulting compressed
+# delta looks just like any other delta. This new delta is also more accurate
+# since its statistics are averaged over a longer period than any of the
+# original deltas.
+
+sub compress_deltas ($)
+{
+ my ($deltas) = @_;
+
+ my %newdelta = ();
+ my ($intrs, $tot);
+ my $cpus = 0;
+ my ($high_intrload) = 0;
+
+ if (VERIFY($#$deltas != -1,
+ "compress_deltas: list of delta is empty?")) {
+ return (0);
+ }
+ $newdelta{minsnap} = $deltas->[0]{minsnap};
+ $newdelta{maxsnap} = $deltas->[$#$deltas]{maxsnap};
+ $newdelta{missing} = 0;
+
+ foreach my $delta (@$deltas) {
+ if (VERIFY($delta->{missing} == 0,
+ "compressing bad deltas?")) {
+ return (0);
+ }
+ while (my ($cpuid, $cpu) = each %$delta) {
+ next if !ref($cpu);
+
+ $intrs += $cpu->{intrs};
+ $tot += $cpu->{tot};
+ $newdelta{$cpuid}{intrs} += $cpu->{intrs};
+ $newdelta{$cpuid}{tot} += $cpu->{tot};
+ if (!exists $newdelta{$cpuid}{ivecs}) {
+ my %ivecs = ();
+ $newdelta{$cpuid}{ivecs} = \%ivecs;
+ }
+ while (my ($inum, $ivec) = each %{$cpu->{ivecs}}) {
+ my $newivecs = $newdelta{$cpuid}{ivecs};
+ $newivecs->{$inum}{time} += $ivec->{time};
+ $newivecs->{$inum}{pil} = $ivec->{pil};
+ $newivecs->{$inum}{ino} = $ivec->{ino};
+ $newivecs->{$inum}{buspath} = $ivec->{buspath};
+ $newivecs->{$inum}{name} = $ivec->{name};
+ $newivecs->{$inum}{ihs} = $ivec->{ihs};
+ $newivecs->{$inum}{num_ino} = $ivec->{num_ino};
+ }
+ }
+ }
+ foreach my $cpu (values(%newdelta)) {
+ next if !ref($cpu); # ignore non-cpu fields
+ $cpus++;
+
+ my $bigintr = 0;
+ foreach my $ivec (values(%{$cpu->{ivecs}})) {
+ if ($ivec->{time} > $bigintr) {
+ $bigintr = $ivec->{time};
+ }
+ }
+ $cpu->{bigintr} = $bigintr;
+ $cpu->{intrload} = $cpu->{intrs} / $cpu->{tot};
+ if ($high_intrload < $cpu->{intrload}) {
+ $high_intrload = $cpu->{intrload};
+ }
+ $cpu->{tot} = 1 if $cpu->{tot} <= 0;
+ }
+ if ($cpus == 0) {
+ $newdelta{avgintrnsec} = 0;
+ $newdelta{avgintrload} = 0;
+ } else {
+ $newdelta{avgintrnsec} = $intrs / $cpus;
+ $newdelta{avgintrload} = $intrs / $tot;
+ }
+ $sleeptime = ($high_intrload < $idle_intrload) ? $idle_sleeptime :
+ $normal_sleeptime;
+ return (\%newdelta);
+}
+
+
+
+
+
+# What follow are the core functions responsible for examining the deltas
+# generated above and deciding what to do about them.
+#
+# goodness() and its helper goodness_cpu() return a heuristic which describe
+# how good (or bad) the current interrupt balance is. The value returned will
+# be between 0 and 1, with 0 representing maximum goodness, and 1 representing
+# maximum badness.
+#
+# imbalanced() compares a current and historical value of goodness, and
+# determines if there has been enough change to warrant evaluating a
+# reconfiguration of the interrupts
+#
+# do_reconfig(), and its helpers, do_reconfig_cpu(), do_reconfig_cpu2cpu(),
+# find_goal(), do_find_goal(), and move_intr(), are responsible for examining
+# a delta and determining the best possible assignment of interrupts to CPUs.
+#
+# It is important that do_reconfig() be in alignment with goodness(). If
+# do_reconfig were to generate a new interrupt distribution that worsened
+# goodness, we could get into a pathological loop with intrd fighting itself,
+# constantly deciding that things are imbalanced, and then changing things
+# only to make them worse.
+
+
+
+# any goodness over $goodness_unsafe_load is considered really bad
+# goodness must drop by at least $goodness_mindelta for a reconfig
+
+my $goodness_unsafe_load = .9;
+my $goodness_mindelta = .1;
+
+# goodness(%delta) examines a delta and return its "goodness". goodness will
+# be between 0 (best) and 1 (major bad). goodness is determined by evaluating
+# the goodness of each individual cpu, and returning the worst case. This
+# helps on systems with many CPUs, where otherwise a single pathological CPU
+# might otherwise be ignored because the average was OK.
+#
+# To calculate the goodness of an individual CPU, we start by looking at its
+# load due to interrupts. If the load is above a certain high threshold and
+# there is more than one interrupt assigned to this CPU, we set goodness
+# to worst-case. If the load is below the average interrupt load of all CPUs,
+# then we return best-case, since what's to complain about?
+#
+# Otherwise we look at how much the load is above the average, and return
+# that as the goodness, with one caveat: we never return more than the CPU's
+# interrupt load ignoring its largest single interrupt source. This is
+# because a CPU with one high-load interrupt, and no other interrupts, is
+# perfectly balanced. Nothing can be done to improve the situation, and thus
+# it is perfectly balanced even if the interrupt's load is 100%.
+
+sub goodness($)
+{
+ my ($delta) = @_;
+
+ return (1) if $delta->{missing} > 0;
+
+ my $high_goodness = 0;
+ my $goodness;
+
+ foreach my $cpu (values(%$delta)) {
+ next if !ref($cpu); # skip non-cpuid fields
+
+ $goodness = goodness_cpu($cpu, $delta->{avgintrload});
+ if (VERIFY($goodness >= 0 && $goodness <= 1,
+ "goodness: cpu goodness out of range?")) {
+ dumpdelta($delta);
+ return (1);
+ }
+ if ($goodness == 1) {
+ return (1); # worst case, no need to continue
+ }
+ if ($goodness > $high_goodness) {
+ $high_goodness = $goodness;
+ }
+ }
+ return ($high_goodness);
+}
+
+sub goodness_cpu($$) # private function
+{
+ my ($cpu, $avgintrload) = @_;
+
+ my $goodness;
+ my $load = $cpu->{intrs} / $cpu->{tot};
+
+ return (0) if ($load < $avgintrload); # low loads are perfectly good
+
+ # Calculate $load_no_bigintr, which represents the load
+ # due to interrupts, excluding the one biggest interrupt.
+ # This is the most gain we can get on this CPU from
+ # offloading interrupts.
+
+ my $load_no_bigintr = ($cpu->{intrs} - $cpu->{bigintr}) / $cpu->{tot};
+
+ # A major imbalance is indicated if a CPU is saturated
+ # with interrupt handling, and it has more than one
+ # source of interrupts. Those other interrupts could be
+ # starved if of a lower pil. Return a goodness of 1,
+ # which is the worst possible return value,
+ # which will effectively contaminate this entire delta.
+
+ my $cnt = keys(%{$cpu->{ivecs}});
+
+ if ($load > $goodness_unsafe_load && $cnt > 1) {
+ return (1);
+ }
+ $goodness = $load - $avgintrload;
+ if ($goodness > $load_no_bigintr) {
+ $goodness = $load_no_bigintr;
+ }
+ return ($goodness);
+}
+
+
+# imbalanced() is used by the main routine to determine if the goodness
+# has shifted far enough from our last baseline to warrant a reassignment
+# of interrupts. A very high goodness indicates that a CPU is way out of
+# whack. If the goodness has varied too much since the baseline, then
+# perhaps a reconfiguration is worth considering.
+
+sub imbalanced ($$)
+{
+ my ($goodness, $baseline) = @_;
+
+ # Return 1 if we are pathological, or creeping away from the baseline
+
+ return (1) if $goodness > .50;
+ return (1) if abs($goodness - $baseline) > $goodness_mindelta;
+ return (0);
+}
+
+# do_reconfig(), do_reconfig_cpu(), and do_reconfig_cpu2cpu(), are the
+# decision-making functions responsible for generating a new interrupt
+# distribution. They are designed with the definition of goodness() in
+# mind, i.e. they use the same definition of "good distribution" as does
+# goodness().
+#
+# do_reconfig() is responsible for deciding whether a redistribution is
+# actually warranted. If the goodness is already pretty good, it doesn't
+# waste the CPU time to generate a new distribution. If it
+# calculates a new distribution and finds that it is not sufficiently
+# improved from the prior distirbution, it will not do the redistribution,
+# mainly to avoid the disruption to system performance caused by
+# rejuggling interrupts.
+#
+# Its main loop works by going through a list of cpus sorted from
+# highest to lowest interrupt load. It removes the highest-load cpus
+# one at a time and hands them off to do_reconfig_cpu(). This function
+# then re-sorts the remaining CPUs from lowest to highest interrupt load,
+# and one at a time attempts to rejuggle interrupts between the original
+# high-load CPU and the low-load CPU. Rejuggling on a high-load CPU is
+# considered finished as soon as its interrupt load is within
+# $goodness_mindelta of the average interrupt load. Such a CPU will have
+# a goodness of below the $goodness_mindelta threshold.
+
+#
+# move_intr(\%delta, $inum, $oldcpu, $newcpu)
+# used by reconfiguration code to move an interrupt between cpus within
+# a delta. This manipulates data structures, and does not actually move
+# the interrupt on the running system.
+#
+sub move_intr($$$$) # private function
+{
+ my ($delta, $inum, $oldcpuid, $newcpuid) = @_;
+
+ my $ivec = $delta->{$oldcpuid}{ivecs}{$inum};
+
+ # Remove ivec from old cpu
+
+ my $oldcpu = $delta->{$oldcpuid};
+ $oldcpu->{intrs} -= $ivec->{time};
+ $oldcpu->{intrload} = $oldcpu->{intrs} / $oldcpu->{tot};
+ delete($oldcpu->{ivecs}{$inum});
+
+ VERIFY($oldcpu->{intrs} >= 0, "move_intr: intr's time > total time?");
+ VERIFY($ivec->{time} <= $oldcpu->{bigintr},
+ "move_intr: intr's time > bigintr?");
+
+ if ($ivec->{time} >= $oldcpu->{bigintr}) {
+ my $bigtime = 0;
+
+ foreach my $ivec (values(%{$oldcpu->{ivecs}})) {
+ $bigtime = $ivec->{time} if $ivec->{time} > $bigtime;
+ }
+ $oldcpu->{bigintr} = $bigtime;
+ }
+
+ # Add ivec onto new cpu
+
+ my $newcpu = $delta->{$newcpuid};
+
+ $ivec->{nowcpu} = $newcpuid;
+ $newcpu->{intrs} += $ivec->{time};
+ $newcpu->{intrload} = $newcpu->{intrs} / $newcpu->{tot};
+ $newcpu->{ivecs}{$inum} = $ivec;
+
+ $newcpu->{bigintr} = $ivec->{time}
+ if $ivec->{time} > $newcpu->{bigintr};
+}
+
+sub move_intr_check($$$) # private function
+{
+ my ($delta, $oldcpuid, $newcpuid) = @_;
+
+ VERIFY($delta->{$oldcpuid}{tot} >= $delta->{$oldcpuid}{intrs},
+ "Moved interrupts left 100+%% load on src cpu");
+ VERIFY($delta->{$newcpuid}{tot} >= $delta->{$newcpuid}{intrs},
+ "Moved interrupts left 100+%% load on tgt cpu");
+}
+
+sub ivecs_to_string(@) # private function
+{
+ my $str = "";
+ foreach my $ivec (@_) {
+ $str = "$str $ivec->{inum}";
+ }
+ return ($str);
+}
+
+
+sub do_reconfig($)
+{
+ my ($delta) = @_;
+
+ my $goodness = $delta->{goodness};
+
+ # We can't improve goodness to better than 0. We should stop here
+ # if, even if we achieve a goodness of 0, the improvement is still
+ # too small to merit the action.
+
+ if ($goodness - 0 < $goodness_mindelta) {
+ syslog('debug', "goodness good enough, don't reconfig");
+ return (0);
+ }
+
+ syslog('notice', "Optimizing interrupt assignments");
+
+ if (VERIFY ($delta->{missing} == 0, "RECONFIG Aborted: should not ".
+ "have a delta with missing")) {
+ return (-1);
+ }
+
+ # Make a list of all cpuids, and also add some extra information
+ # to the ivec structures.
+
+ my @cpusortlist = ();
+
+ while (my ($cpuid, $cpu) = each %$delta) {
+ next if !ref($cpu); # skip non-cpu entries
+
+ push(@cpusortlist, $cpuid);
+ while (my ($inum, $ivec) = each %{$cpu->{ivecs}}) {
+ $ivec->{origcpu} = $cpuid;
+ $ivec->{nowcpu} = $cpuid;
+ $ivec->{inum} = $inum;
+ }
+ }
+
+ # Sort the list of CPUs from highest to lowest interrupt load.
+ # Remove the top CPU from that list and attempt to redistribute
+ # its interrupts. If the CPU has a goodness below a threshold,
+ # just ignore the CPU and move to the next one. If the CPU's
+ # load falls below the average load plus that same threshold,
+ # then there are no CPUs left worth reconfiguring, and we're done.
+
+ while (@cpusortlist) {
+ # Re-sort cpusortlist each time, since do_reconfig_cpu can
+ # move interrupts around.
+
+ @cpusortlist =
+ sort({$delta->{$b}{intrload} <=> $delta->{$a}{intrload}}
+ @cpusortlist);
+
+ my $cpu = shift(@cpusortlist);
+ if (($delta->{$cpu}{intrload} <= $goodness_unsafe_load) &&
+ ($delta->{$cpu}{intrload} <=
+ $delta->{avgintrload} + $goodness_mindelta)) {
+ syslog('debug', "finished reconfig: cpu $cpu load ".
+ "$delta->{$cpu}{intrload} avgload ".
+ "$delta->{avgintrload}");
+ last;
+ }
+ if (goodness_cpu($delta->{$cpu}, $delta->{avgintrload}) <
+ $goodness_mindelta) {
+ next;
+ }
+ do_reconfig_cpu($delta, \@cpusortlist, $cpu);
+ }
+
+ # How good a job did we do? If the improvement was minimal, and
+ # our goodness wasn't pathological (and thus needing any help it
+ # can get), then don't bother moving the interrupts.
+
+ my $newgoodness = goodness($delta);
+ VERIFY($newgoodness <= $goodness,
+ "reconfig: result has worse goodness?");
+
+ if (($goodness != 1 || $newgoodness == 1) &&
+ $goodness - $newgoodness < $goodness_mindelta) {
+ syslog('debug', "goodness already near optimum, ".
+ "don't reconfig");
+ return (0);
+ }
+ syslog('debug', "goodness %5.2f%% --> %5.2f%%", $goodness*100,
+ $newgoodness*100);
+
+ # Time to move those interrupts!
+
+ my $ret = 1;
+ my $warned = 0;
+ while (my ($cpuid, $cpu) = each %$delta) {
+ next if $cpuid =~ /\D/;
+ while (my ($inum, $ivec) = each %{$cpu->{ivecs}}) {
+ next if ($ivec->{origcpu} == $cpuid);
+
+ if (!intrmove($ivec->{buspath}, $ivec->{ino},
+ $cpuid, $ivec->{num_ino})) {
+ syslog('warning', "Unable to move interrupts")
+ if $warned++ == 0;
+ syslog('debug', "Unable to move buspath ".
+ "$ivec->{buspath} ino $ivec->{ino} to ".
+ "cpu $cpuid");
+ $ret = -1;
+ }
+ }
+ }
+
+ syslog('notice', "Interrupt assignments optimized");
+ return ($ret);
+}
+
+sub do_reconfig_cpu($$$) # private function
+{
+ my ($delta, $cpusortlist, $oldcpuid) = @_;
+
+ # We have been asked to rejuggle interrupts between $oldcpuid and
+ # other CPUs found on $cpusortlist so as to improve the load on
+ # $oldcpuid. We reverse $cpusortlist to get our own copy of the
+ # list, sorted from lowest to highest interrupt load. One at a
+ # time, shift a CPU off of this list of CPUs, and attempt to
+ # rejuggle interrupts between the two CPUs. Don't do this if the
+ # other CPU has a higher load than oldcpuid. We're done rejuggling
+ # once $oldcpuid's goodness falls below a threshold.
+
+ syslog('debug', "reconfiguring $oldcpuid");
+
+ my $cpu = $delta->{$oldcpuid};
+ my $avgintrload = $delta->{avgintrload};
+
+ my @cputargetlist = reverse(@$cpusortlist); # make a copy of the list
+ while ($#cputargetlist != -1) {
+ last if goodness_cpu($cpu, $avgintrload) < $goodness_mindelta;
+
+ my $tgtcpuid = shift(@cputargetlist);
+ my $tgt = $delta->{$tgtcpuid};
+ my $load = $cpu->{intrload};
+ my $tgtload = $tgt->{intrload};
+ last if $tgtload > $load;
+ do_reconfig_cpu2cpu($delta, $oldcpuid, $tgtcpuid, $load);
+ }
+}
+
+sub do_reconfig_cpu2cpu($$$$) # private function
+{
+ my ($delta, $srccpuid, $tgtcpuid, $srcload) = @_;
+
+ # We've been asked to consider interrupt juggling between srccpuid
+ # (with a high interrupt load) and tgtcpuid (with a lower interrupt
+ # load). First, make a single list with all of the ivecs from both
+ # CPUs, and sort the list from highest to lowest load.
+
+ syslog('debug', "exchanging intrs between $srccpuid and $tgtcpuid");
+
+ # Gather together all the ivecs and sort by load
+
+ my @ivecs = (values(%{$delta->{$srccpuid}{ivecs}}),
+ values(%{$delta->{$tgtcpuid}{ivecs}}));
+ return if $#ivecs == -1;
+
+ @ivecs = sort({$b->{time} <=> $a->{time}} @ivecs);
+
+ # Our "goal" load for srccpuid is the average load across all CPUs.
+ # find_goal() will find determine the optimum selection of the
+ # available interrupts which comes closest to this goal without
+ # falling below the goal.
+
+ my $goal = $delta->{avgintrnsec};
+
+ # We know that the interrupt load on tgtcpuid is less than that on
+ # srccpuid, but its load could still be above avgintrnsec. Don't
+ # choose a goal which would bring srccpuid below the load on tgtcpuid.
+
+ my $avgnsec =
+ ($delta->{$srccpuid}{intrs} + $delta->{$tgtcpuid}{intrs}) / 2;
+ if ($goal < $avgnsec) {
+ $goal = $avgnsec;
+ }
+
+ # If the largest of the interrupts is on srccpuid, leave it there.
+ # This can help minimize the disruption caused by moving interrupts.
+
+ if ($ivecs[0]->{origcpu} == $srccpuid) {
+ syslog('debug', "Keeping $ivecs[0]->{inum} on $srccpuid");
+ $goal -= $ivecs[0]->{time};
+ shift(@ivecs);
+ }
+
+ syslog('debug', "GOAL: inums should total $goal");
+ find_goal(\@ivecs, $goal);
+
+ # find_goal() returned its results to us by setting $ivec->{goal} if
+ # the ivec should be on srccpuid, or clearing it for tgtcpuid.
+ # Call move_intr() to update our $delta with the new results.
+
+ foreach my $ivec (@ivecs) {
+ syslog('debug', "ivec $ivec->{inum} goal $ivec->{goal}");
+ VERIFY($ivec->{nowcpu} == $srccpuid ||
+ $ivec->{nowcpu} == $tgtcpuid, "cpu2cpu found an ".
+ "interrupt not currently on src or tgt cpu");
+
+ if ($ivec->{goal} && $ivec->{nowcpu} != $srccpuid) {
+ move_intr($delta, $ivec->{inum}, $ivec->{nowcpu},
+ $srccpuid);
+ } elsif ($ivec->{goal} == 0 && $ivec->{nowcpu} != $tgtcpuid) {
+ move_intr($delta, $ivec->{inum}, $ivec->{nowcpu},
+ $tgtcpuid);
+ }
+ }
+ move_intr_check($delta, $srccpuid, $tgtcpuid); # asserts
+
+ my $newload = $delta->{$srccpuid}{intrs} / $delta->{$srccpuid}{tot};
+ VERIFY($newload <= $srcload && $newload > $delta->{avgintrload},
+ "cpu2cpu: new load didn't end up in expected range");
+}
+
+
+# find_goal() and its helper do_find_goal() are used to find the best
+# combination of interrupts in order to generate a load that is as close
+# as possible to a goal load without falling below that goal. Before returning
+# to its caller, find_goal() sets a new value in the hash of each interrupt,
+# {goal}, which if set signifies that this interrupt is one of the interrupts
+# identified as part of the set of interrupts which best meet the goal.
+#
+# The arguments to find_goal are a list of ivecs (hash references), sorted
+# by descending {time}, and the goal load. The goal is relative to {time}.
+# The best fit is determined by performing a depth-first search. do_find_goal
+# is the recursive subroutine which carries out the search.
+#
+# It is passed an index as an argument, originally 0. On a given invocation,
+# it is only to consider interrupts in the ivecs array starting at that index.
+# It then considers two possibilities:
+# 1) What is the best goal-fit if I include ivecs[index]?
+# 2) What is the best goal-fit if I exclude ivecs[index]?
+# To determine case 1, it subtracts the load of ivecs[index] from the goal,
+# and calls itself recursively with that new goal and index++.
+# To determine case 2, it calls itself recursively with the same goal and
+# index++.
+#
+# It then compares the two results, decide which one best meets the goals,
+# and returns the result. The return value is the best-fit's interrupt load,
+# followed by a list of all the interrupts which make up that best-fit.
+#
+# As an optimization, a second array loads[] is created which mirrors ivecs[].
+# loads[i] will equal the total loads of all ivecs[i..$#ivecs]. This is used
+# by do_find_goal to avoid recursing all the way to the end of the ivecs
+# array if including all remaining interrupts will still leave the best-fit
+# at below goal load. If so, it then includes all remaining interrupts on
+# the goal list and returns.
+#
+sub find_goal($$) # private function
+{
+ my ($ivecs, $goal) = @_;
+
+ my @goals;
+ my $load;
+ my $ivec;
+
+ if ($goal <= 0) {
+ @goals = (); # the empty set will best meet the goal
+ } else {
+ syslog('debug', "finding goal from intrs %s",
+ ivecs_to_string(@$ivecs));
+
+ # Generate @loads array
+
+ my $tot = 0;
+ foreach $ivec (@$ivecs) {
+ $tot += $ivec->{time};
+ }
+ my @loads = ();
+ foreach $ivec (@$ivecs) {
+ push(@loads, $tot);
+ $tot -= $ivec->{time};
+ }
+ ($load, @goals) = do_find_goal($ivecs, \@loads, $goal, 0);
+ VERIFY($load >= $goal, "find_goal didn't meet goals");
+ }
+ syslog('debug', "goals found: %s", ivecs_to_string(@goals));
+
+ # Set or clear $ivec->{goal} for each ivec, based on returned @goals
+
+ foreach $ivec (@$ivecs) {
+ if ($#goals > -1 && $ivec == $goals[0]) {
+ syslog('debug', "inum $ivec->{inum} on source cpu");
+ $ivec->{goal} = 1;
+ shift(@goals);
+ } else {
+ syslog('debug', "inum $ivec->{inum} on target cpu");
+ $ivec->{goal} = 0;
+ }
+ }
+}
+
+
+sub do_find_goal($$$$) # private function
+{
+ my ($ivecs, $loads, $goal, $idx) = @_;
+
+ if ($idx > $#{$ivecs}) {
+ return (0);
+ }
+ syslog('debug', "$idx: finding goal $goal inum $ivecs->[$idx]{inum}");
+
+ my $load = $ivecs->[$idx]{time};
+ my @goals_with = ();
+ my @goals_without = ();
+ my ($with, $without);
+
+ # If we include all remaining items and we're still below goal,
+ # stop here. We can just return a result that includes $idx and all
+ # subsequent ivecs. Since this will still be below goal, there's
+ # nothing better to be done.
+
+ if ($loads->[$idx] <= $goal) {
+ syslog('debug',
+ "$idx: including all remaining intrs %s with load %d",
+ ivecs_to_string(@$ivecs[$idx .. $#{$ivecs}]),
+ $loads->[$idx]);
+ return ($loads->[$idx], @$ivecs[$idx .. $#{$ivecs}]);
+ }
+
+ # Evaluate the "with" option, i.e. the best matching goal which
+ # includes $ivecs->[$idx]. If idx's load is more than our goal load,
+ # stop here. Once we're above the goal, there is no need to consider
+ # further interrupts since they'll only take us further from the goal.
+
+ if ($goal <= $load) {
+ $with = $load; # stop here
+ } else {
+ ($with, @goals_with) =
+ do_find_goal($ivecs, $loads, $goal - $load, $idx + 1);
+ $with += $load;
+ }
+ syslog('debug', "$idx: with-load $with intrs %s",
+ ivecs_to_string($ivecs->[$idx], @goals_with));
+
+ # Evaluate the "without" option, i.e. the best matching goal which
+ # excludes $ivecs->[$idx].
+
+ ($without, @goals_without) =
+ &do_find_goal($ivecs, $loads, $goal, $idx + 1);
+ syslog('debug', "$idx: without-load $without intrs %s",
+ ivecs_to_string(@goals_without));
+
+ # We now have our "with" and "without" options, and we choose which
+ # best fits the goal. If one is greater than goal and the other is
+ # below goal, we choose the one that is greater. If they are both
+ # below goal, then we choose the one that is greater. If they are
+ # both above goal, then we choose the smaller.
+
+ my $which; # 0 == with, 1 == without
+ if ($with >= $goal && $without < $goal) {
+ $which = 0;
+ } elsif ($with < $goal && $without >= $goal) {
+ $which = 1;
+ } elsif ($with >= $goal && $without >= $goal) {
+ $which = ($without < $with);
+ } else {
+ $which = ($without > $with);
+ }
+
+ # Return the load of our best case scenario, followed by all the ivecs
+ # which compose that goal.
+
+ if ($which == 1) { # without
+ syslog('debug', "$idx: going without");
+ return ($without, @goals_without);
+ } else {
+ syslog('debug', "$idx: going with");
+ return ($with, $ivecs->[$idx], @goals_with);
+ }
+ # Not reached
+}
+
+
+
+
+syslog('debug', "intrd is starting".($debug ? " (debug)" : ""));
+
+my @deltas = ();
+my $deltas_tottime = 0; # sum of maxsnap-minsnap across @deltas
+my $avggoodness;
+my $baseline_goodness = 0;
+my $compdelta;
+
+my $do_reconfig;
+
+# temp variables
+my $goodness;
+my $deltatime;
+my $olddelta;
+my $olddeltatime;
+my $delta;
+my $newstat;
+my $below_statslen;
+my $newtime;
+my $ret;
+
+
+my $gotsig = 0;
+$SIG{INT} = sub { $gotsig = 1; }; # don't die in the middle of retargeting
+$SIG{HUP} = $SIG{INT};
+$SIG{TERM} = $SIG{INT};
+
+my $ks;
+if ($using_scengen == 0) {
+ $ks = Sun::Solaris::Kstat->new();
+} else {
+ $ks = myks_update(); # supplied by the simulator
+}
+
+# If no pci_intrs kstats were found, we need to exit, but we can't because
+# SMF will restart us and/or report an error to the administrator. But
+# there's nothing an administrator can do. So print out a message for SMF
+# logs and silently pause forever.
+
+if (!exists($ks->{pci_intrs})) {
+ print STDERR "$cmdname: no interrupts were found; ".
+ "your PCI bus may not yet be supported\n";
+ pause() while $gotsig == 0;
+ exit 0;
+}
+
+# See if this is a system with a pcplusmp APIC.
+# Such systems will get special handling.
+# Assume that if one bus has a pcplusmp APIC that they all do.
+
+# Get a list of pci_intrs kstats.
+my @elem = values(%{$ks->{pci_intrs}});
+my $elem0 = $elem[0];
+my $elemval = (values(%$elem0))[0];
+
+# Use its buspath to query the system. It is assumed that either all or none
+# of the busses on a system are hosted by the pcplusmp APIC.
+my $pcplusmp_sys = is_pcplusmp($elemval->{buspath});
+
+my $stat = getstat($ks, $pcplusmp_sys);
+
+for (;;) {
+ sub clear_deltas {
+ @deltas = ();
+ $deltas_tottime = 0;
+ $stat = 0; # prevent next gen_delta() from setting {missing}
+ }
+
+ # 1. Sleep, update the kstats, and save the new stats in $newstat.
+
+ exit 0 if $gotsig; # if we got ^C / SIGTERM, exit
+ if ($using_scengen == 0) {
+ sleep($sleeptime);
+ exit 0 if $gotsig; # if we got ^C / SIGTERM, exit
+ $ks->update();
+ } else {
+ $ks = myks_update();
+ }
+ $newstat = getstat($ks, $pcplusmp_sys);
+
+ # $stat or $newstat could be zero if they're uninitialized, or if
+ # getstat() failed. If $stat is zero, move $newstat to $stat, sleep
+ # and try again. If $newstat is zero, then we also sleep and try
+ # again, hoping the problem will clear up.
+
+ next if (!ref $newstat);
+ if (!ref $stat) {
+ $stat = $newstat;
+ next;
+ }
+
+ # 2. Compare $newstat with the prior set of values, result in %$delta.
+
+ $delta = generate_delta($stat, $newstat);
+ dumpdelta($delta) if $debug; # Dump most recent stats to stdout.
+ $stat = $newstat; # The new stats now become the old stats.
+
+
+ # 3. If $delta->{missing}, then there has been a reconfiguration of
+ # either cpus or interrupts (probably both). We need to toss out our
+ # old set of statistics and start from scratch.
+ #
+ # Also, if the delta covers a very long range of time, then we've
+ # been experiencing a system overload that has resulted in intrd
+ # not being allowed to run effectively for a while now. As above,
+ # toss our old statistics and start from scratch.
+
+ $deltatime = $delta->{maxsnap} - $delta->{minsnap};
+ if ($delta->{missing} > 0 || $deltatime > $statslen) {
+ clear_deltas();
+ syslog('debug', "evaluating interrupt assignments");
+ next;
+ }
+
+
+ # 4. Incorporate new delta into the list of deltas, and associated
+ # statistics. If we've just now received $statslen deltas, then it's
+ # time to evaluate a reconfiguration.
+
+ $below_statslen = ($deltas_tottime < $statslen);
+ $deltas_tottime += $deltatime;
+ $do_reconfig = ($below_statslen && $deltas_tottime >= $statslen);
+ push(@deltas, $delta);
+
+ # 5. Remove old deltas if total time is more than $statslen. We use
+ # @deltas as a moving average of the last $statslen seconds. Shift
+ # off the olders deltas, but only if that doesn't cause us to fall
+ # below $statslen seconds.
+
+ while (@deltas > 1) {
+ $olddelta = $deltas[0];
+ $olddeltatime = $olddelta->{maxsnap} - $olddelta->{minsnap};
+ $newtime = $deltas_tottime - $olddeltatime;
+ last if ($newtime < $statslen);
+
+ shift(@deltas);
+ $deltas_tottime = $newtime;
+ }
+
+ # 6. The brains of the operation are here. First, check if we're
+ # imbalanced, and if so set $do_reconfig. If $do_reconfig is set,
+ # either because of imbalance or above in step 4, we evaluate a
+ # new configuration.
+ #
+ # First, take @deltas and generate a single "compressed" delta
+ # which summarizes them all. Pass that to do_reconfig and see
+ # what it does with it:
+ #
+ # $ret == -1 : failure
+ # $ret == 0 : current config is optimal (or close enough)
+ # $ret == 1 : reconfiguration has occurred
+ #
+ # If $ret is -1 or 1, dump all our deltas and start from scratch.
+ # Step 4 above will set do_reconfig soon thereafter.
+ #
+ # If $ret is 0, then nothing has happened because we're already
+ # good enough. Set baseline_goodness to current goodness.
+
+ $compdelta = compress_deltas(\@deltas);
+ if (VERIFY(ref($compdelta) eq "HASH", "couldn't compress deltas")) {
+ clear_deltas();
+ next;
+ }
+ $compdelta->{goodness} = goodness($compdelta);
+ dumpdelta($compdelta) if $debug;
+
+ $goodness = $compdelta->{goodness};
+ syslog('debug', "GOODNESS: %5.2f%%", $goodness * 100);
+
+ if ($deltas_tottime >= $statslen &&
+ imbalanced($goodness, $baseline_goodness)) {
+ $do_reconfig = 1;
+ }
+
+ if ($do_reconfig) {
+ $ret = do_reconfig($compdelta);
+
+ if ($ret != 0) {
+ clear_deltas();
+ syslog('debug', "do_reconfig FAILED!") if $ret == -1;
+ } else {
+ syslog('debug', "setting new baseline of $goodness");
+ $baseline_goodness = $goodness;
+ }
+ }
+ syslog('debug', "---------------------------------------");
+}