diff -r 000000000000 -r 68f95e015346 usr/src/uts/common/fs/nfs/nfs_log.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/fs/nfs/nfs_log.c Tue Jun 14 00:00:00 2005 -0700 @@ -0,0 +1,1912 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUM_RECORDS_TO_WRITE 256 +#define NUM_BYTES_TO_WRITE 65536 + +extern krwlock_t exported_lock; + +static int nfslog_num_records_to_write = NUM_RECORDS_TO_WRITE; +static int nfslog_num_bytes_to_write = NUM_BYTES_TO_WRITE; + +/* + * This struct is used to 'hide' the details of managing the log + * records internally to the logging code. Allocation routines + * are used to obtain pieces of memory for XDR encoding. This struct + * is a 'header' to those areas and a opaque cookie is used to pass + * this data structure between the allocating function and the put + * function. + */ +struct lr_alloc { + struct lr_alloc *next; /* links for write queuing */ + struct lr_alloc *prev; +#define LR_ALLOC_NOFREE 0x1 /* not present, call free */ + int lr_flags; + caddr_t log_record; /* address to XDR encoding */ + size_t size; /* final size of encoding */ + struct kmem_cache *alloc_cache; /* keep track of cache ptr */ + struct exportinfo *exi; /* who are we related to? */ + struct log_buffer *lb; +}; + +struct flush_thread_params { + struct nfsl_flush_args tp_args; + int tp_error; +}; + +static int log_file_create(caddr_t, struct log_file **); +static void log_file_rele(struct log_file *); +static struct log_buffer *log_buffer_create(caddr_t); +static void log_buffer_rele(struct log_buffer *); +static int nfslog_record_append2all(struct lr_alloc *); +static int nfslog_logbuffer_rename(struct log_buffer *); +static void nfslog_logfile_wait(struct log_file *); +static int nfslog_logfile_rename(char *, char *); +static void nfslog_do_flush(struct flush_thread_params *); +static void create_buffer_header(caddr_t *, size_t *, size_t *); + +static int nfslog_write_logrecords(struct log_file *, struct lr_alloc *, int); +static void nfslog_free_logrecords(struct lr_alloc *); +static int nfslog_records_flush_to_disk(struct log_buffer *); +static int nfslog_records_flush_to_disk_nolock(struct log_buffer *); + +/* + * Read/Write lock that protects 'nfslog_buffer_list'. + * This lock must be held when searching or modifying 'nfslog_buffer_list'. + */ +static krwlock_t nfslog_buffer_list_lock; + +/* + * The list of "log_buffer" structures. + */ +struct log_buffer *nfslog_buffer_list = NULL; + + +#define LOG_BUFFER_HOLD(lbp) { \ + mutex_enter(&(lbp)->lb_lock); \ + (lbp)->lb_refcnt++; \ + mutex_exit(&(lbp)->lb_lock); \ +} + +#define LOG_FILE_HOLD(lfp) { \ + mutex_enter(&(lfp)->lf_lock); \ + (lfp)->lf_refcnt++; \ + mutex_exit(&(lfp)->lf_lock); \ +} + +#define LOG_FILE_RELE(lfp) { \ + log_file_rele(lfp); \ +} + +/* + * These two macros are used to prep a logfile data structure and + * associated file for writing data. Note that the lf_lock is + * held as a result of the call to the first macro. This is used + * for serialization correctness between the logbuffer struct and + * the logfile struct. + */ +#define LOG_FILE_LOCK_TO_WRITE(lfp) { \ + mutex_enter(&(lfp)->lf_lock); \ + (lfp)->lf_refcnt++; \ + (lfp)->lf_writers++; \ +} + +#define LOG_FILE_UNLOCK_FROM_WRITE(lfp) { \ + (lfp)->lf_writers--; \ + if ((lfp)->lf_writers == 0 && ((lfp)->lf_flags & L_WAITING)) { \ + (lfp)->lf_flags &= ~L_WAITING; \ + cv_broadcast(&(lfp)->lf_cv_waiters); \ + } \ + mutex_exit(&(lfp)->lf_lock); \ + log_file_rele(lfp); \ +} + +int rfsl_log_buffer = 0; +static int rfsl_log_file = 0; + +/* This array is used for memory allocation of record encoding spaces */ +static struct { + int size; + struct kmem_cache *mem_cache; + char *cache_name; +} nfslog_mem_alloc[] = { +#define SMALL_INDX 0 + { NFSLOG_SMALL_RECORD_SIZE - sizeof (struct lr_alloc), + NULL, NFSLOG_SMALL_REC_NAME }, +#define MEDIUM_INDX 1 + { NFSLOG_MEDIUM_RECORD_SIZE - sizeof (struct lr_alloc), + NULL, NFSLOG_MEDIUM_REC_NAME }, +#define LARGE_INDX 2 + { NFSLOG_LARGE_RECORD_SIZE - sizeof (struct lr_alloc), + NULL, NFSLOG_LARGE_REC_NAME }, + { (-1), NULL } +}; + +/* Used to calculate the 'real' allocation size */ +#define ALLOC_SIZE(index) \ + (nfslog_mem_alloc[index].size + sizeof (struct lr_alloc)) + +/* + * Initialize logging data buffer cache + */ +void +nfslog_init() +{ + int indx; + + rw_init(&nfslog_buffer_list_lock, NULL, RW_DEFAULT, NULL); + + /* + * Initialize the kmem caches for encoding + */ + for (indx = 0; nfslog_mem_alloc[indx].size != (-1); indx++) { + nfslog_mem_alloc[indx].mem_cache = + kmem_cache_create(nfslog_mem_alloc[indx].cache_name, + ALLOC_SIZE(indx), + 0, NULL, NULL, NULL, NULL, NULL, 0); + } +} + +/* + * Sets up the necessary log file and related buffers to enable logging + * on the given export point. + * Returns 0 on success, non-zero on failure. + */ +int +nfslog_setup(struct exportinfo *exi) +{ + struct exportdata *kex; + struct log_buffer *lbp; + struct log_buffer *nlbp; + + kex = &exi->exi_export; + ASSERT(kex->ex_flags & EX_LOG); + + /* + * Logging is enabled for the new export point, check + * the existing log_buffer structures to see if the + * desired buffer has already been opened. If so, point + * the new exportinfo's exi_logbuffer to the existing + * one. + */ + rw_enter(&nfslog_buffer_list_lock, RW_READER); + for (lbp = nfslog_buffer_list; lbp != NULL; lbp = lbp->lb_next) { + LOGGING_DPRINT((10, + "searching for buffer... found log_buffer '%s'\n", + lbp->lb_path)); + if (strcmp(lbp->lb_path, kex->ex_log_buffer) == 0) { + /* Found our match. Ref it and return */ + LOG_BUFFER_HOLD(lbp); + exi->exi_logbuffer = lbp; + LOGGING_DPRINT((10, "\tfound log_buffer for '%s'\n", + kex->ex_log_buffer)); + rw_exit(&nfslog_buffer_list_lock); + return (0); + } + } + rw_exit(&nfslog_buffer_list_lock); + + /* + * New buffer needed, allocate it. + * The buffer list lock has been dropped so we will need to search + * the list again to ensure that another thread has not added + * a matching buffer. + */ + if ((nlbp = log_buffer_create(kex->ex_log_buffer)) == NULL) { + /* + * Failed the buffer creation for some reason so we + * will need to return. + */ + return (EIO); + } + + rw_enter(&nfslog_buffer_list_lock, RW_WRITER); + for (lbp = nfslog_buffer_list; lbp != NULL; + lbp = lbp->lb_next) { + if (strcmp(lbp->lb_path, kex->ex_log_buffer) == 0) { + /* + * A log_buffer already exists for the + * indicated buffer, use it instead. + */ + LOG_BUFFER_HOLD(lbp); + + exi->exi_logbuffer = lbp; + + LOGGING_DPRINT((10, + "found log_buffer for '%s' " + "after allocation\n", + kex->ex_log_buffer)); + + rw_exit(&nfslog_buffer_list_lock); + + log_buffer_rele(nlbp); + + return (0); + } + } + /* + * Didn't find an existing log_buffer for this buffer, + * use the the newly created one, and add to list. We + * increment the reference count because the node is + * entered into the global list. + */ + LOGGING_DPRINT((10, "exportfs: adding nlbp=%p to list\n", + nlbp)); + + nlbp->lb_next = nfslog_buffer_list; + nfslog_buffer_list = nlbp; + + LOG_BUFFER_HOLD(nlbp); /* hold is for export entry */ + exi->exi_logbuffer = nlbp; + + rw_exit(&nfslog_buffer_list_lock); + + return (0); +} + +/* + * Disables logging for the given export point. + */ +void +nfslog_disable(struct exportinfo *exi) +{ + log_buffer_rele(exi->exi_logbuffer); +} + +/* + * Creates the corresponding log_buffer and log_file structures + * for the the buffer named 'name'. + * Returns a pointer to the log_buffer structure with reference one. + */ +static struct log_buffer * +log_buffer_create(caddr_t name) +{ + struct log_buffer *buffer; + struct log_file *logfile; + int namelen = strlen(name); + + LOGGING_DPRINT((10, "log_buffer_create: %s\n", name)); + if (log_file_create(name, &logfile)) + return (NULL); + + buffer = (struct log_buffer *)kmem_alloc(sizeof (*buffer), KM_SLEEP); + buffer->lb_refcnt = 1; + buffer->lb_rec_id = 0; + buffer->lb_path = (caddr_t)kmem_alloc(namelen + 1, KM_SLEEP); + bcopy(name, buffer->lb_path, namelen + 1); + buffer->lb_logfile = logfile; + buffer->lb_records = NULL; + buffer->lb_num_recs = 0; + buffer->lb_size_queued = 0; + mutex_init(&buffer->lb_lock, NULL, MUTEX_DEFAULT, NULL); + rfsl_log_buffer++; + + return (buffer); +} + +/* + * Release a log_buffer structure + */ +static void +log_buffer_rele(struct log_buffer *lbp) +{ + int len; + + mutex_enter(&lbp->lb_lock); + if (--lbp->lb_refcnt > 1) { + mutex_exit(&lbp->lb_lock); + return; + } + + if (lbp->lb_refcnt < 0) { + panic("log_rele: log_buffer refcnt < 0"); + /*NOTREACHED*/ + } + + /* + * Need to drop the lb_lock before acquiring the + * nfslog_buffer_list_lock. To avoid double free we need + * to hold an additional reference to the log buffer. + * This will ensure that no two threads will simultaneously + * be trying to free the same log buffer. + */ + + if (lbp->lb_refcnt == 1) { + + /* + * If the ref count is 1, then the last + * unshare/reference has been given up and we need to + * clean up the buffer and remove it from the buffer + * list. + */ + LOGGING_DPRINT((10, + "log_buffer_rele lbp=%p disconnecting\n", lbp)); + /* + * Hold additional reference before dropping the lb_lock + */ + + lbp->lb_refcnt++; + mutex_exit(&lbp->lb_lock); + + /* + * Make sure that all of the buffered records are written. + * Don't bother checking the write return value since there + * isn't much we can do at this point. + */ + (void) nfslog_records_flush_to_disk(lbp); + + rw_enter(&nfslog_buffer_list_lock, RW_WRITER); + mutex_enter(&lbp->lb_lock); + /* + * Drop the reference count held above. + * If the ref count is still > 1 then someone has + * stepped in to use this log buffer. unlock and return. + */ + if (--lbp->lb_refcnt > 1) { + mutex_exit(&lbp->lb_lock); + rw_exit(&nfslog_buffer_list_lock); + return; + } + + if (lbp == nfslog_buffer_list) { + nfslog_buffer_list = lbp->lb_next; + } else { + struct log_buffer *tlbp; + + /* Drop the log_buffer from the master list */ + for (tlbp = nfslog_buffer_list; tlbp->lb_next != NULL; + tlbp = tlbp->lb_next) { + if (tlbp->lb_next == lbp) { + tlbp->lb_next = lbp->lb_next; + break; + } + } + } + + mutex_exit(&lbp->lb_lock); + rw_exit(&nfslog_buffer_list_lock); + } + /* + * ref count zero; finish clean up. + */ + LOGGING_DPRINT((10, "log_buffer_rele lbp=%p freeing\n", lbp)); + + log_file_rele(lbp->lb_logfile); + len = strlen(lbp->lb_path) + 1; + kmem_free(lbp->lb_path, len); + kmem_free(lbp, sizeof (*lbp)); + rfsl_log_buffer--; +} + +/* + * Creates the corresponding log_file structure for the buffer + * named 'log_file_name'. + * 'log_file_name' is created by concatenating 'origname' and LOG_INPROG_STRING. + * 'logfile' is set to be the log_file structure with reference one. + */ +static int +log_file_create(caddr_t origname, struct log_file **lfpp) +{ + vnode_t *vp = NULL; + char *name; + int namelen; + int error; + struct log_file *logfile = NULL; + vattr_t va; + caddr_t loghdr = NULL; + size_t loghdr_len = 0; + size_t loghdr_free = 0; + + namelen = strlen(origname) + strlen(LOG_INPROG_STRING); + name = (caddr_t)kmem_alloc(namelen + 1, KM_SLEEP); + (void) sprintf(name, "%s%s", origname, LOG_INPROG_STRING); + + LOGGING_DPRINT((3, "log_file_create: %s\n", name)); + if (error = vn_open(name, UIO_SYSSPACE, FCREAT|FWRITE|FOFFMAX, + LOG_MODE, &vp, CRCREAT, 0)) { + nfs_cmn_err(error, CE_WARN, + "log_file_create: Can not open %s - error %m", name); + goto out; + } + LOGGING_DPRINT((3, "log_file_create: %s vp=%p v_count=%d\n", + name, vp, vp->v_count)); + + logfile = (struct log_file *)kmem_zalloc(sizeof (*logfile), KM_SLEEP); + logfile->lf_path = name; + /* + * No need to bump the vnode reference count since it is set + * to one by vn_open(). + */ + logfile->lf_vp = vp; + logfile->lf_refcnt = 1; + mutex_init(&logfile->lf_lock, NULL, MUTEX_DEFAULT, NULL); + rfsl_log_file++; + + va.va_mask = AT_SIZE; + error = VOP_GETATTR(vp, &va, 0, CRED()); + if (error) { + nfs_cmn_err(error, CE_WARN, + "log_file_create: Can not stat %s - error = %m", + name); + goto out; + } + + if (va.va_size == 0) { + struct lr_alloc lr; + + /* + * Write Header. + */ + create_buffer_header(&loghdr, &loghdr_len, &loghdr_free); + /* + * Dummy up a lr_alloc struct for the write + */ + lr.next = lr.prev = &lr; + lr.lr_flags = 0; + lr.log_record = loghdr; + lr.size = loghdr_len; + lr.alloc_cache = NULL; + lr.exi = NULL; + lr.lb = NULL; + + mutex_enter(&logfile->lf_lock); + + error = nfslog_write_logrecords(logfile, &lr, 1); + + mutex_exit(&logfile->lf_lock); + + if (error != 0) { + nfs_cmn_err(error, CE_WARN, + "log_file_create: Can not write header " + "on %s - error = %m", name); + goto out; + } + } + *lfpp = logfile; + + if (loghdr != NULL) + kmem_free(loghdr, loghdr_free); + + return (0); + +out: + if (vp != NULL) { + int error1; + error1 = VOP_CLOSE(vp, FCREAT|FWRITE|FOFFMAX, 1, (offset_t)0, + CRED()); + if (error1) { + nfs_cmn_err(error1, CE_WARN, + "log_file_create: Can not close %s - " + "error = %m", name); + } + VN_RELE(vp); + } + + kmem_free(name, namelen + 1); + if (logfile != NULL) { + mutex_destroy(&logfile->lf_lock); + kmem_free(logfile, sizeof (*logfile)); + rfsl_log_file--; + } + if (loghdr != NULL) + kmem_free(loghdr, loghdr_free); + + return (error); +} + +/* + * Release a log_file structure + */ +static void +log_file_rele(struct log_file *lfp) +{ + int len; + int error; + + mutex_enter(&lfp->lf_lock); + if (--lfp->lf_refcnt > 0) { + LOGGING_DPRINT((10, + "log_file_rele lfp=%p decremented refcnt to %d\n", + lfp, lfp->lf_refcnt)); + mutex_exit(&lfp->lf_lock); + return; + } + if (lfp->lf_refcnt < 0) { + panic("log_file_rele: log_file refcnt < 0"); + /*NOTREACHED*/ + } + + LOGGING_DPRINT((10, "log_file_rele lfp=%p freeing node\n", lfp)); + + lfp->lf_flags &= ~(L_PRINTED | L_ERROR); + + ASSERT(lfp->lf_flags == 0); + ASSERT(lfp->lf_writers == 0); + + if (error = VOP_CLOSE(lfp->lf_vp, FCREAT|FWRITE|FOFFMAX, 1, (offset_t)0, + CRED())) { + nfs_cmn_err(error, CE_WARN, + "NFS: Could not close log buffer %s - error = %m", + lfp->lf_path); +#ifdef DEBUG + } else { + LOGGING_DPRINT((3, + "log_file_rele: %s has been closed vp=%p " + "v_count=%d\n", + lfp->lf_path, lfp->lf_vp, lfp->lf_vp->v_count)); +#endif + } + VN_RELE(lfp->lf_vp); + + len = strlen(lfp->lf_path) + 1; + kmem_free(lfp->lf_path, len); + kmem_free(lfp, sizeof (*lfp)); + rfsl_log_file--; +} + +/* + * Allocates a record of the size specified. + * 'exi' identifies the exportinfo structure being logged. + * 'size' indicates how much memory should be allocated + * 'cookie' is used to store an opaque value for the caller for later use + * 'flags' currently ignored. + * + * Returns a pointer to the beginning of the allocated memory. + * 'cookie' is a pointer to the 'lr_alloc' struct; this will be used + * to keep track of the encoded record and contains all the info + * for enqueuing the record on the log buffer for later writing. + * + * nfslog_record_put() must be used to 'free' this record or allocation. + */ +/* ARGSUSED */ +void * +nfslog_record_alloc( + struct exportinfo *exi, + int alloc_indx, + void **cookie, + int flags) +{ + struct lr_alloc *lrp; + + lrp = (struct lr_alloc *) + kmem_cache_alloc(nfslog_mem_alloc[alloc_indx].mem_cache, + KM_NOSLEEP); + + if (lrp == NULL) { + *cookie = NULL; + return (NULL); + } + + lrp->next = lrp; + lrp->prev = lrp; + lrp->lr_flags = 0; + + lrp->log_record = (caddr_t)((uintptr_t)lrp + + (uintptr_t)sizeof (struct lr_alloc)); + lrp->size = nfslog_mem_alloc[alloc_indx].size; + lrp->alloc_cache = nfslog_mem_alloc[alloc_indx].mem_cache; + lrp->exi = exi; + + if (exi->exi_export.ex_flags & EX_LOG) { + LOG_BUFFER_HOLD(exi->exi_logbuffer); + lrp->lb = exi->exi_logbuffer; + } else { + lrp->lb = NULL; + } + + *cookie = (void *)lrp; + + LOGGING_DPRINT((3, + "nfslog_record_alloc(log_buffer=%p mem=%p size=%lu)\n", + exi->exi_logbuffer, lrp->log_record, lrp->size)); + return (lrp->log_record); +} + +/* + * After the above nfslog_record_alloc() has been called and a record + * encoded into the buffer that was returned, this function is called + * to handle appropriate disposition of the newly created record. + * The cookie value is the one that was returned from nfslog_record_alloc(). + * Size is the actual size of the record that was encoded. This is + * passed in because the size used for the alloc was just an approximation. + * The sync parameter is used to tell us if we need to force this record + * to disk and if not it will be queued for later writing. + * + * Note that if the size parameter has a value of 0, then the record is + * not written to the log and the associated data structures are released. + */ +void +nfslog_record_put(void *cookie, size_t size, bool_t sync, + unsigned int which_buffers) +{ + struct lr_alloc *lrp = (struct lr_alloc *)cookie; + struct log_buffer *lbp = lrp->lb; + + /* + * If the caller has nothing to write or if there is + * an apparent error, rele the buffer and free. + */ + if (size == 0 || size > lrp->size) { + nfslog_free_logrecords(lrp); + return; + } + + /* + * Reset the size to what actually needs to be written + * This is used later on when the iovec is built for + * writing the records to the log file. + */ + lrp->size = size; + + /* append to all if public exi */ + if (which_buffers == NFSLOG_ALL_BUFFERS) { + (void) nfslog_record_append2all(lrp); + nfslog_free_logrecords(lrp); + return; + } + + /* Insert the record on the list to be written */ + mutex_enter(&lbp->lb_lock); + if (lbp->lb_records == NULL) { + lbp->lb_records = (caddr_t)lrp; + lbp->lb_num_recs = 1; + lbp->lb_size_queued = lrp->size; + } else { + insque(lrp, ((struct lr_alloc *)lbp->lb_records)->prev); + lbp->lb_num_recs++; + lbp->lb_size_queued += lrp->size; + } + + /* + * Determine if the queue for this log buffer should be flushed. + * This is done by either the number of records queued, the total + * size of all records queued or by the request of the caller + * via the sync parameter. + */ + if (lbp->lb_size_queued >= nfslog_num_bytes_to_write || + lbp->lb_num_recs > nfslog_num_records_to_write || + sync == TRUE) { + mutex_exit(&lbp->lb_lock); + (void) nfslog_records_flush_to_disk(lbp); + } else { + mutex_exit(&lbp->lb_lock); + } + +} + +/* + * Examine the log_buffer struct to see if there are queue log records + * that need to be written to disk. If some exist, pull them off of + * the log buffer and write them to the log file. + */ +static int +nfslog_records_flush_to_disk(struct log_buffer *lbp) +{ + + mutex_enter(&lbp->lb_lock); + + if (lbp->lb_records == NULL) { + mutex_exit(&lbp->lb_lock); + return (0); + } + return (nfslog_records_flush_to_disk_nolock(lbp)); +} + +/* + * Function requires that the caller holds lb_lock. + * Function flushes any records in the log buffer to the disk. + * Function drops the lb_lock on return. + */ + +static int +nfslog_records_flush_to_disk_nolock(struct log_buffer *lbp) +{ + struct log_file *lfp = NULL; + struct lr_alloc *lrp_writers; + int num_recs; + int error = 0; + + ASSERT(MUTEX_HELD(&lbp->lb_lock)); + + lfp = lbp->lb_logfile; + + LOG_FILE_LOCK_TO_WRITE(lfp); + ASSERT(lbp->lb_records != NULL); + + lrp_writers = (struct lr_alloc *)lbp->lb_records; + lbp->lb_records = NULL; + num_recs = lbp->lb_num_recs; + lbp->lb_num_recs = 0; + lbp->lb_size_queued = 0; + mutex_exit(&lbp->lb_lock); + error = nfslog_write_logrecords(lfp, lrp_writers, num_recs); + + LOG_FILE_UNLOCK_FROM_WRITE(lfp); + + nfslog_free_logrecords(lrp_writers); + return (error); +} + + +/* + * Take care of writing the provided log record(s) to the log file. + * We group the log records with an iovec and use VOP_WRITE to append + * them to the end of the log file. + */ +static int +nfslog_write_logrecords(struct log_file *lfp, + struct lr_alloc *lrp_writers, int num_recs) +{ + struct uio uio; + struct iovec *iovp; + int size_iovecs; + vnode_t *vp; + struct vattr va; + struct lr_alloc *lrp; + int i; + ssize_t len; + int ioflag = FAPPEND; + int error = 0; + + ASSERT(MUTEX_HELD(&lfp->lf_lock)); + + vp = lfp->lf_vp; + + size_iovecs = sizeof (struct iovec) * num_recs; + iovp = (struct iovec *)kmem_alloc(size_iovecs, KM_NOSLEEP); + + if (iovp == NULL) { + error = ENOMEM; + goto out; + } + + /* Build the iovec based on the list of log records */ + i = 0; + len = 0; + lrp = lrp_writers; + do { + iovp[i].iov_base = lrp->log_record; + iovp[i].iov_len = lrp->size; + len += lrp->size; + lrp = lrp->next; + i++; + } while (lrp != lrp_writers); + + ASSERT(i == num_recs); + + uio.uio_iov = iovp; + uio.uio_iovcnt = num_recs; + uio.uio_loffset = 0; + uio.uio_segflg = (short)UIO_SYSSPACE; + uio.uio_resid = len; + uio.uio_llimit = (rlim64_t)MAXOFFSET_T; + uio.uio_fmode = FWRITE; + uio.uio_extflg = UIO_COPY_DEFAULT; + + /* + * Save the size. If the write fails, reset the size to avoid + * corrupted log buffer files. + */ + va.va_mask = AT_SIZE; + + (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); /* UIO_WRITE */ + if ((error = VOP_GETATTR(vp, &va, 0, CRED())) == 0) { + if ((len + va.va_size) < (MAXOFF32_T)) { + error = VOP_WRITE(vp, &uio, ioflag, CRED(), NULL); + if (uio.uio_resid) + error = ENOSPC; + if (error) + (void) VOP_SETATTR(vp, &va, 0, CRED(), NULL); + } else { + if (!(lfp->lf_flags & L_PRINTED)) { + cmn_err(CE_WARN, + "NFS Logging: buffer file %s exceeds 2GB; " + "stopped writing buffer \n", lfp->lf_path); + } + error = ENOSPC; + } + } + VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); + + kmem_free(iovp, size_iovecs); + +out: + if (error) { + if (!(lfp->lf_flags & L_PRINTED)) { + nfs_cmn_err(error, CE_WARN, + "NFS Logging disabled for buffer %s - " + "write error = %m\n", lfp->lf_path); + lfp->lf_flags |= L_PRINTED; + } + } else if (lfp->lf_flags & (L_ERROR | L_PRINTED)) { + lfp->lf_flags &= ~(L_ERROR | L_PRINTED); + cmn_err(CE_WARN, + "NFS Logging re-enabled for buffer %s\n", lfp->lf_path); + } + + return (error); +} + +static void +nfslog_free_logrecords(struct lr_alloc *lrp_writers) +{ + struct lr_alloc *lrp = lrp_writers; + struct lr_alloc *lrp_free; + + do { + lrp_free = lrp; + + lrp = lrp->next; + + /* + * Check to see if we are supposed to free this structure + * and relese the log_buffer ref count. + * It may be the case that the caller does not want this + * structure and its record contents freed just yet. + */ + if ((lrp_free->lr_flags & LR_ALLOC_NOFREE) == 0) { + if (lrp_free->lb != NULL) + log_buffer_rele(lrp_free->lb); + if (lrp_free->alloc_cache) /* double check */ + kmem_cache_free(lrp_free->alloc_cache, + (void *)lrp_free); + } else { + /* + * after being pulled from the list the + * pointers need to be reinitialized. + */ + lrp_free->next = lrp_free; + lrp_free->prev = lrp_free; + } + + } while (lrp != lrp_writers); +} + +/* + * Rename lbp->lb_logfile to reflect the true name requested by 'share' + */ +static int +nfslog_logbuffer_rename(struct log_buffer *lbp) +{ + struct log_file *lf; + int error; + struct log_file *logfile; + + /* + * Try our best to get the cache records into the log file + * before the rename occurs. + */ + (void) nfslog_records_flush_to_disk(lbp); + + /* + * Hold lb_lock before retrieving + * lb_logfile. + * Hold a reference to the + * "lf" structure. this is + * same as LOG_FILE_HOLD() + */ + mutex_enter(&(lbp)->lb_lock); + lf = lbp->lb_logfile; + mutex_enter(&(lf)->lf_lock); + mutex_exit(&(lbp)->lb_lock); + lf->lf_refcnt++; + mutex_exit(&(lf)->lf_lock); + + LOGGING_DPRINT((10, "nfslog_logbuffer_rename: renaming %s to %s\n", + lf->lf_path, lbp->lb_path)); + + /* + * rename the current buffer to what the daemon expects + */ + if (error = nfslog_logfile_rename(lf->lf_path, lbp->lb_path)) + goto out; + + /* + * Create a new working buffer file and have all new data sent there. + */ + if (error = log_file_create(lbp->lb_path, &logfile)) { + /* Attempt to rename to original */ + (void) nfslog_logfile_rename(lbp->lb_path, lf->lf_path); + goto out; + } + + /* + * Hold the lb_lock here, this will make + * all the threads trying to access lb->logfile block + * and get a new logfile structure instead of old one. + */ + mutex_enter(&(lbp)->lb_lock); + lbp->lb_logfile = logfile; + mutex_exit(&(lbp)->lb_lock); + + LOG_FILE_RELE(lf); /* release log_buffer's reference */ + + /* + * Wait for log_file to be in a quiescent state before we + * return to our caller to let it proceed with the reading of + * this file. + */ + nfslog_logfile_wait(lf); + +out: + /* + * Release our reference on "lf" in two different cases. + * 1. Error condition, release only the reference + * that we held at the begining of this + * routine on "lf" structure. + * 2. Fall through condition, no errors but the old + * logfile structure "lf" has been replaced with + * the new "logfile" structure, so release the + * reference that was part of the creation of + * "lf" structure to free up the resources. + */ + + LOG_FILE_RELE(lf); + + return (error); +} + +/* + * Renames the 'from' file to 'new'. + */ +static int +nfslog_logfile_rename(char *from, char *new) +{ + int error; + + if (error = vn_rename(from, new, UIO_SYSSPACE)) { + cmn_err(CE_WARN, + "nfslog_logfile_rename: couldn't rename %s to %s\n", + from, new); + } + return (error); +} + +/* + * Wait for the log_file writers to finish before returning + */ +static void +nfslog_logfile_wait(struct log_file *lf) +{ + mutex_enter(&lf->lf_lock); + while (lf->lf_writers > 0) { + lf->lf_flags |= L_WAITING; + (void) cv_wait_sig(&lf->lf_cv_waiters, &lf->lf_lock); + } + mutex_exit(&lf->lf_lock); +} + +static int +nfslog_record_append2all(struct lr_alloc *lrp) +{ + struct log_buffer *lbp, *nlbp; + int error, ret_error = 0; + int lr_flags = lrp->lr_flags; + + rw_enter(&nfslog_buffer_list_lock, RW_READER); + if ((lbp = nfslog_buffer_list) != NULL) + LOG_BUFFER_HOLD(lbp); + for (nlbp = NULL; lbp != NULL; lbp = nlbp) { + if ((nlbp = lbp->lb_next) != NULL) { + /* + * Remember next element in the list + */ + LOG_BUFFER_HOLD(nlbp); + } + rw_exit(&nfslog_buffer_list_lock); + + /* + * Insert the record on the buffer's list to be written + * and then flush the records to the log file. + * Make sure to set the no free flag so that the + * record can be used for the next write + */ + lrp->lr_flags = LR_ALLOC_NOFREE; + + ASSERT(lbp != NULL); + mutex_enter(&lbp->lb_lock); + if (lbp->lb_records == NULL) { + lbp->lb_records = (caddr_t)lrp; + lbp->lb_num_recs = 1; + lbp->lb_size_queued = lrp->size; + } else { + insque(lrp, ((struct lr_alloc *)lbp->lb_records)->prev); + lbp->lb_num_recs++; + lbp->lb_size_queued += lrp->size; + } + + /* + * Flush log records to disk. + * Function is called with lb_lock held. + * Function drops the lb_lock on return. + */ + error = nfslog_records_flush_to_disk_nolock(lbp); + + if (error) { + ret_error = -1; + nfs_cmn_err(error, CE_WARN, + "rfsl_log_pubfh: could not append record to " + "\"%s\" error = %m\n", lbp->lb_path); + } + log_buffer_rele(lbp); + rw_enter(&nfslog_buffer_list_lock, RW_READER); + } + rw_exit(&nfslog_buffer_list_lock); + + lrp->lr_flags = lr_flags; + + return (ret_error); +} + +#ifdef DEBUG +static int logging_debug = 0; + +/* + * 0) no debugging + * 3) current test software + * 10) random stuff + */ +void +nfslog_dprint(const int level, const char *fmt, ...) +{ + va_list args; + + if (logging_debug == level || + (logging_debug > 10 && (logging_debug - 10) >= level)) { + va_start(args, fmt); + (void) vprintf(fmt, args); + va_end(args); + } +} + +#endif /* DEBUG */ + +/* + * NFS Log Flush system call + * Caller must check privileges. + */ +/* ARGSUSED */ +int +nfsl_flush(struct nfsl_flush_args *args, model_t model) +{ + struct flush_thread_params *tparams; + struct nfsl_flush_args *nfsl_args; + int error = 0; + ulong_t buffer_len; + STRUCT_HANDLE(nfsl_flush_args, uap); + + STRUCT_SET_HANDLE(uap, model, args); + + tparams = (struct flush_thread_params *) + kmem_zalloc(sizeof (*tparams), KM_SLEEP); + + nfsl_args = &tparams->tp_args; + nfsl_args->version = STRUCT_FGET(uap, version); + if (nfsl_args->version != NFSL_FLUSH_ARGS_VERS) { + cmn_err(CE_WARN, "nfsl_flush: exected version %d, got %d", + NFSL_FLUSH_ARGS_VERS, nfsl_args->version); + return (EIO); + } + + nfsl_args->directive = STRUCT_FGET(uap, directive); + if ((nfsl_args->directive & NFSL_ALL) == 0) { + /* + * Process a specific buffer + */ + nfsl_args->buff_len = STRUCT_FGET(uap, buff_len); + + nfsl_args->buff = (char *) + kmem_alloc(nfsl_args->buff_len, KM_NOSLEEP); + if (nfsl_args->buff == NULL) + return (ENOMEM); + + error = copyinstr((const char *)STRUCT_FGETP(uap, buff), + nfsl_args->buff, nfsl_args->buff_len, &buffer_len); + if (error) + return (EFAULT); + + if (nfsl_args->buff_len != buffer_len) + return (EFAULT); + } + + LOGGING_DPRINT((10, "nfsl_flush: Flushing %s buffer(s)\n", + nfsl_args->directive & NFSL_ALL ? "all" : nfsl_args->buff)); + + if (nfsl_args->directive & NFSL_SYNC) { + /* + * Do the work synchronously + */ + nfslog_do_flush(tparams); + error = tparams->tp_error; + kmem_free(nfsl_args->buff, nfsl_args->buff_len); + kmem_free(tparams, sizeof (*tparams)); + } else { + /* + * Do the work asynchronously + */ + (void) thread_create(NULL, 0, nfslog_do_flush, + tparams, 0, &p0, TS_RUN, minclsyspri); + } + + return (error); +} + +/* + * This is where buffer flushing would occur, but there is no buffering + * at this time. + * Possibly rename the log buffer for processing. + * Sets tparams->ta_error equal to the value of the error that occured, + * 0 otherwise. + * Returns ENOENT if the buffer is not found. + */ +static void +nfslog_do_flush(struct flush_thread_params *tparams) +{ + struct nfsl_flush_args *args; + struct log_buffer *lbp; + int error = ENOENT; + int found = 0; + char *buf_inprog; /* name of buff in progress */ + int buf_inprog_len; + + /* + * Sanity check on the arguments. + */ + if (!tparams) + return; + args = &tparams->tp_args; + if (!args) + return; + + rw_enter(&nfslog_buffer_list_lock, RW_READER); + for (lbp = nfslog_buffer_list; lbp != NULL; lbp = lbp->lb_next) { + if (args->directive & NFSL_ALL) { + (void) nfslog_records_flush_to_disk(lbp); + } else { + if ((strcmp(lbp->lb_path, args->buff) == 0) && + (args->directive & NFSL_RENAME)) { + + error = nfslog_logbuffer_rename(lbp); + found++; + break; + } + } + } + rw_exit(&nfslog_buffer_list_lock); + + if (!found && ((args->directive & NFSL_ALL) == 0) && + (args->directive & NFSL_RENAME)) { + /* + * The specified buffer is not currently in use, + * simply rename the file indicated. + */ + buf_inprog_len = strlen(args->buff) + + strlen(LOG_INPROG_STRING) + 1; + buf_inprog = (caddr_t)kmem_alloc(buf_inprog_len, KM_SLEEP); + (void) sprintf(buf_inprog, "%s%s", + args->buff, LOG_INPROG_STRING); + + error = nfslog_logfile_rename(buf_inprog, args->buff); + + kmem_free(buf_inprog, buf_inprog_len); + } + +out: + if ((args->directive & NFSL_SYNC) == 0) { + /* + * Work was performed asynchronously, the caller is + * no longer waiting for us. + * Free the thread arguments and exit. + */ + kmem_free(args->buff, args->buff_len); + kmem_free(tparams, sizeof (*tparams)); + thread_exit(); + /* NOTREACHED */ + } + + tparams->tp_error = error; +} + +/* + * Generate buffer_header. + * 'loghdr' points the the buffer_header, and *reclen + * contains the length of the buffer. + */ +static void +create_buffer_header(caddr_t *loghdr, size_t *reclen, size_t *freesize) +{ + timestruc_t now; + nfslog_buffer_header lh; + XDR xdrs; + unsigned int final_size; + + + /* pick some size that will hold the buffer_header */ + *freesize = NFSLOG_SMALL_RECORD_SIZE; + + /* + * Fill header + */ + lh.bh_length = 0; /* don't know yet how large it will be */ + lh.bh_version = NFSLOG_BUF_VERSION; + lh.bh_flags = 0; + lh.bh_offset = 0; + gethrestime(&now); + TIMESPEC_TO_TIMESPEC32(&lh.bh_timestamp, &now); + + /* + * Encode the header + */ + *loghdr = (caddr_t)kmem_alloc(*freesize, KM_SLEEP); + xdrmem_create(&xdrs, *loghdr, *freesize, XDR_ENCODE); + + (void) xdr_nfslog_buffer_header(&xdrs, &lh); + + /* + * Reset with final size of the encoded data + */ + final_size = xdr_getpos(&xdrs); + xdr_setpos(&xdrs, 0); + (void) xdr_u_int(&xdrs, &final_size); + + *reclen = (size_t)final_size; +} + +/* + * **************************************************************** + * RPC dispatch table for logging + * Indexed by program, version, proc + * Based on NFS dispatch table. + */ +struct nfslog_proc_disp { + bool_t (*xdrargs)(); + bool_t (*xdrres)(); + bool_t affects_transactions; /* Operation affects transaction */ + /* processing */ +}; + +struct nfslog_vers_disp { + int nfslog_dis_nprocs; /* number of procs */ + struct nfslog_proc_disp *nfslog_dis_proc_table; /* proc array */ +}; + +struct nfslog_prog_disp { + int nfslog_dis_prog; /* program number */ + int nfslog_dis_versmin; /* Minimum version value */ + int nfslog_dis_nvers; /* Number of version values */ + struct nfslog_vers_disp *nfslog_dis_vers_table; /* versions array */ +}; + +static int rfs_log_bad = 0; /* incremented on bad log attempts */ +static int rfs_log_good = 0; /* incremented on successful log attempts */ + +/* + * Define the actions taken per prog/vers/proc: + * + * In some cases, the nl types are the same as the nfs types and a simple + * bcopy should suffice. Rather that define tens of identical procedures, + * simply define these to bcopy. Similarly this takes care of different + * procs that use same parameter struct. + */ + +static struct nfslog_proc_disp nfslog_proc_v2[] = { + /* + * NFS VERSION 2 + */ + + /* RFS_NULL = 0 */ + {xdr_void, xdr_void, FALSE}, + + /* RFS_GETATTR = 1 */ + {xdr_fhandle, xdr_nfslog_getattrres, FALSE}, + + /* RFS_SETATTR = 2 */ + {xdr_nfslog_setattrargs, xdr_nfsstat, TRUE}, + + /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */ + {xdr_void, xdr_void, FALSE}, + + /* RFS_LOOKUP = 4 */ + {xdr_nfslog_diropargs, xdr_nfslog_diropres, TRUE}, + + /* RFS_READLINK = 5 */ + {xdr_fhandle, xdr_nfslog_rdlnres, FALSE}, + + /* RFS_READ = 6 */ + {xdr_nfslog_nfsreadargs, xdr_nfslog_rdresult, TRUE}, + + /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */ + {xdr_void, xdr_void, FALSE}, + + /* RFS_WRITE = 8 */ + {xdr_nfslog_writeargs, xdr_nfslog_writeresult, TRUE}, + + /* RFS_CREATE = 9 */ + {xdr_nfslog_createargs, xdr_nfslog_diropres, TRUE}, + + /* RFS_REMOVE = 10 */ + {xdr_nfslog_diropargs, xdr_nfsstat, TRUE}, + + /* RFS_RENAME = 11 */ + {xdr_nfslog_rnmargs, xdr_nfsstat, TRUE}, + + /* RFS_LINK = 12 */ + {xdr_nfslog_linkargs, xdr_nfsstat, TRUE}, + + /* RFS_SYMLINK = 13 */ + {xdr_nfslog_symlinkargs, xdr_nfsstat, TRUE}, + + /* RFS_MKDIR = 14 */ + {xdr_nfslog_createargs, xdr_nfslog_diropres, TRUE}, + + /* RFS_RMDIR = 15 */ + {xdr_nfslog_diropargs, xdr_nfsstat, TRUE}, + + /* RFS_READDIR = 16 */ + {xdr_nfslog_rddirargs, xdr_nfslog_rddirres, TRUE}, + + /* RFS_STATFS = 17 */ + {xdr_fhandle, xdr_nfslog_statfs, FALSE}, +}; + + +/* + * NFS VERSION 3 + */ + +static struct nfslog_proc_disp nfslog_proc_v3[] = { + + /* NFSPROC3_NULL = 0 */ + {xdr_void, xdr_void, FALSE}, + + /* NFSPROC3_GETATTR = 1 */ + {xdr_nfs_fh3, xdr_nfslog_GETATTR3res, FALSE}, + + /* NFSPROC3_SETATTR = 2 */ + {xdr_nfslog_SETATTR3args, xdr_nfslog_SETATTR3res, TRUE}, + + /* NFSPROC3_LOOKUP = 3 */ + {xdr_nfslog_diropargs3, xdr_nfslog_LOOKUP3res, TRUE}, + + /* NFSPROC3_ACCESS = 4 */ + {xdr_nfslog_ACCESS3args, xdr_nfslog_ACCESS3res, FALSE}, + + /* NFSPROC3_READLINK = 5 */ + {xdr_nfs_fh3, xdr_nfslog_READLINK3res, FALSE}, + + /* NFSPROC3_READ = 6 */ + {xdr_nfslog_READ3args, xdr_nfslog_READ3res, TRUE}, + + /* NFSPROC3_WRITE = 7 */ + {xdr_nfslog_WRITE3args, xdr_nfslog_WRITE3res, TRUE}, + + /* NFSPROC3_CREATE = 8 */ + {xdr_nfslog_CREATE3args, xdr_nfslog_CREATE3res, TRUE}, + + /* NFSPROC3_MKDIR = 9 */ + {xdr_nfslog_MKDIR3args, xdr_nfslog_MKDIR3res, TRUE}, + + /* NFSPROC3_SYMLINK = 10 */ + {xdr_nfslog_SYMLINK3args, xdr_nfslog_SYMLINK3res, TRUE}, + + /* NFSPROC3_MKNOD = 11 */ + {xdr_nfslog_MKNOD3args, xdr_nfslog_MKNOD3res, TRUE}, + + /* NFSPROC3_REMOVE = 12 */ + {xdr_nfslog_REMOVE3args, xdr_nfslog_REMOVE3res, TRUE}, + + /* NFSPROC3_RMDIR = 13 */ + {xdr_nfslog_RMDIR3args, xdr_nfslog_RMDIR3res, TRUE}, + + /* NFSPROC3_RENAME = 14 */ + {xdr_nfslog_RENAME3args, xdr_nfslog_RENAME3res, TRUE}, + + /* NFSPROC3_LINK = 15 */ + {xdr_nfslog_LINK3args, xdr_nfslog_LINK3res, TRUE}, + + /* NFSPROC3_READDIR = 16 */ + {xdr_nfslog_READDIR3args, xdr_nfslog_READDIR3res, TRUE}, + + /* NFSPROC3_READDIRPLUS = 17 */ + {xdr_nfslog_READDIRPLUS3args, xdr_nfslog_READDIRPLUS3res, TRUE}, + + /* NFSPROC3_FSSTAT = 18 */ + {xdr_nfslog_FSSTAT3args, xdr_nfslog_FSSTAT3res, FALSE}, + + /* NFSPROC3_FSINFO = 19 */ + {xdr_nfslog_FSINFO3args, xdr_nfslog_FSINFO3res, FALSE}, + + /* NFSPROC3_PATHCONF = 20 */ + {xdr_nfslog_PATHCONF3args, xdr_nfslog_PATHCONF3res, FALSE}, + + /* NFSPROC3_COMMIT = 21 */ + {xdr_nfslog_COMMIT3args, xdr_nfslog_COMMIT3res, FALSE}, +}; + +static struct nfslog_proc_disp nfslog_proc_v1[] = { + /* + * NFSLOG VERSION 1 + */ + + /* NFSLOG_NULL = 0 */ + {xdr_void, xdr_void, TRUE}, + + /* NFSLOG_SHARE = 1 */ + {xdr_nfslog_sharefsargs, xdr_nfslog_sharefsres, TRUE}, + + /* NFSLOG_UNSHARE = 2 */ + {xdr_nfslog_sharefsargs, xdr_nfslog_sharefsres, TRUE}, + + /* NFSLOG_LOOKUP = 3 */ + {xdr_nfslog_diropargs3, xdr_nfslog_LOOKUP3res, TRUE}, + + /* NFSLOG_GETFH = 4 */ + {xdr_nfslog_getfhargs, xdr_nfsstat, TRUE}, +}; + +static struct nfslog_vers_disp nfslog_vers_disptable[] = { + {sizeof (nfslog_proc_v2) / sizeof (nfslog_proc_v2[0]), + nfslog_proc_v2}, + {sizeof (nfslog_proc_v3) / sizeof (nfslog_proc_v3[0]), + nfslog_proc_v3}, +}; + +static struct nfslog_vers_disp nfslog_nfslog_vers_disptable[] = { + {sizeof (nfslog_proc_v1) / sizeof (nfslog_proc_v1[0]), + nfslog_proc_v1}, +}; + +static struct nfslog_prog_disp nfslog_dispatch_table[] = { + {NFS_PROGRAM, NFS_VERSMIN, + (sizeof (nfslog_vers_disptable) / + sizeof (nfslog_vers_disptable[0])), + nfslog_vers_disptable}, + + {NFSLOG_PROGRAM, NFSLOG_VERSMIN, + (sizeof (nfslog_nfslog_vers_disptable) / + sizeof (nfslog_nfslog_vers_disptable[0])), + nfslog_nfslog_vers_disptable}, +}; + +static int nfslog_dispatch_table_arglen = sizeof (nfslog_dispatch_table) / + sizeof (nfslog_dispatch_table[0]); + +/* + * This function will determine the appropriate export info struct to use + * and allocate a record id to be used in the written log buffer. + * Usually this is a straightforward operation but the existence of the + * multicomponent lookup and its semantics of crossing file system + * boundaries add to the complexity. See the comments below... + */ +struct exportinfo * +nfslog_get_exi( + struct exportinfo *exi, + struct svc_req *req, + caddr_t res, + unsigned int *nfslog_rec_id) +{ + struct log_buffer *lb; + struct exportinfo *exi_ret = NULL; + fhandle_t *fh = NULL; + nfs_fh3 *fh3; + + if (exi == NULL) + return (NULL); + + /* + * If the exi is marked for logging, allocate a record id and return + */ + if (exi->exi_export.ex_flags & EX_LOG) { + lb = exi->exi_logbuffer; + + /* obtain the unique record id for the caller */ + *nfslog_rec_id = atomic_add_32_nv(&lb->lb_rec_id, (int32_t)1); + + /* + * The caller will expect to be able to exi_rele() it, + * so exi->exi_count must be incremented before it can + * be returned, to make it uniform with exi_ret->exi_count + */ + mutex_enter(&exi->exi_lock); + exi->exi_count++; + mutex_exit(&exi->exi_lock); + + return (exi); + } + + if (exi != exi_public) + return (NULL); + + /* + * Here we have an exi that is not marked for logging. + * It is possible that this request is a multicomponent lookup + * that was done from the public file handle (not logged) and + * the resulting file handle being returned to the client exists + * in a file system that is being logged. If this is the case + * we need to log this multicomponent lookup to the appropriate + * log buffer. This will allow for the appropriate path name + * mapping to occur at user level. + */ + if (req->rq_prog == NFS_PROGRAM) { + switch (req->rq_vers) { + case NFS_V3: + if ((req->rq_proc == NFSPROC3_LOOKUP) && + (((LOOKUP3res *)res)->status == NFS3_OK)) { + fh3 = &((LOOKUP3res *)res)->res_u.ok.object; + if (fh3->fh3_length == sizeof (fhandle_t)) + fh = &fh3->fh3_u.nfs_fh3_i.fh3_i; + } + break; + + case NFS_VERSION: + if ((req->rq_proc == RFS_LOOKUP) && + (((struct nfsdiropres *) + res)->dr_status == NFS_OK)) { + fh = + &((struct nfsdiropres *)res)->dr_u.dr_drok_u.drok_fhandle; + } + break; + default: + break; + } + if (fh != NULL) { + exi_ret = checkexport(&fh->fh_fsid, + (fid_t *)&fh->fh_xlen); + } + } + + if (exi_ret != NULL && exi_ret->exi_export.ex_flags & EX_LOG) { + lb = exi_ret->exi_logbuffer; + /* obtain the unique record id for the caller */ + *nfslog_rec_id = atomic_add_32_nv(&lb->lb_rec_id, (int32_t)1); + + return (exi_ret); + } + return (NULL); +} + +#ifdef DEBUG +static long long rfslog_records_ignored = 0; +#endif + +/* + * nfslog_write_record - Fill in the record buffer for writing out. + * If logrecp is null, log it, otherwise, malloc the record and return it. + * + * It is the responsibility of the caller to check whether this exportinfo + * has logging enabled. + * Note that nfslog_share_public_record() only needs to check for the + * existence of at least one logbuffer to which the public filehandle record + * needs to be logged. + */ +void +nfslog_write_record(struct exportinfo *exi, struct svc_req *req, + caddr_t args, caddr_t res, cred_t *cr, struct netbuf *pnb, + unsigned int record_id, unsigned int which_buffers) +{ + struct nfslog_prog_disp *progtable; /* prog struct */ + struct nfslog_vers_disp *verstable; /* version struct */ + struct nfslog_proc_disp *disp = NULL; /* proc struct */ + int i, vers; + void *log_cookie; /* for logrecord if */ + caddr_t buffer; + XDR xdrs; + unsigned int final_size; + int encode_ok; + int alloc_indx; + + ASSERT(exi != NULL); ASSERT(req != NULL); ASSERT(args != NULL); + ASSERT(res != NULL); ASSERT(cr != NULL); + + /* + * Find program element + * Search the list since program can not be used as index + */ + for (i = 0; (i < nfslog_dispatch_table_arglen); i++) { + if (req->rq_prog == nfslog_dispatch_table[i].nfslog_dis_prog) + break; + } + if (i >= nfslog_dispatch_table_arglen) { /* program not logged */ + /* not an error */ + return; + } + + /* + * Extract the dispatch functions based on program/version + */ + progtable = &nfslog_dispatch_table[i]; + vers = req->rq_vers - progtable->nfslog_dis_versmin; + verstable = &progtable->nfslog_dis_vers_table[vers]; + disp = &verstable->nfslog_dis_proc_table[req->rq_proc]; + + if (!(exi->exi_export.ex_flags & EX_LOG_ALLOPS) && + !disp->affects_transactions) { + /* + * Only interested in logging operations affecting + * transaction generation. This is not one of them. + */ +#ifdef DEBUG + rfslog_records_ignored++; +#endif + return; + } + + switch (req->rq_prog) { + case NFS_PROGRAM: + switch (req->rq_vers) { + case NFS_V3: + switch (req->rq_proc) { + case NFSPROC3_READDIRPLUS: + alloc_indx = MEDIUM_INDX; + break; + default: + alloc_indx = SMALL_INDX; + break; + } + break; + default: + alloc_indx = SMALL_INDX; + break; + } + break; + case NFSLOG_PROGRAM: + alloc_indx = MEDIUM_INDX; + break; + default: + alloc_indx = SMALL_INDX; + break; + } + + do { + encode_ok = FALSE; + + /* Pick the size to alloc; end of the road - return */ + if (nfslog_mem_alloc[alloc_indx].size == (-1)) { + cmn_err(CE_WARN, + "NFSLOG: unable to encode record - prog=%d " + "proc = %d", req->rq_prog, req->rq_proc); + return; + } + + buffer = nfslog_record_alloc(exi, alloc_indx, &log_cookie, 0); + if (buffer == NULL) { + /* Error processing - no space alloced */ + rfs_log_bad++; + cmn_err(CE_WARN, "NFSLOG: can't get record"); + return; + } + + xdrmem_create(&xdrs, buffer, + nfslog_mem_alloc[alloc_indx].size, XDR_ENCODE); + + /* + * Encode the header, args and results of the record + */ + if (xdr_nfslog_request_record(&xdrs, exi, req, cr, pnb, + nfslog_mem_alloc[alloc_indx].size, record_id) && + (*disp->xdrargs)(&xdrs, args) && + (*disp->xdrres)(&xdrs, res)) { + encode_ok = TRUE; + + rfs_log_good++; + /* + * Get the final size of the encoded + * data and insert that length at the + * beginning. + */ + final_size = xdr_getpos(&xdrs); + xdr_setpos(&xdrs, 0); + (void) xdr_u_int(&xdrs, &final_size); + } else { + /* Oops, the encode failed so we need to free memory */ + nfslog_record_put(log_cookie, 0, FALSE, which_buffers); + alloc_indx++; + } + + } while (encode_ok == FALSE); + + + /* + * Take the final log record and put it in the log file. + * This may be queued to the file internally and written + * later unless the last parameter is TRUE. + * If the record_id is 0 then this is most likely a share/unshare + * request and it should be written synchronously to the log file. + */ + nfslog_record_put(log_cookie, final_size, + (record_id == 0), which_buffers); +} + +static char * +get_publicfh_path(int *alloc_length) +{ + extern struct exportinfo *exi_public; + char *pubpath; + + rw_enter(&exported_lock, RW_READER); + + *alloc_length = exi_public->exi_export.ex_pathlen + 1; + pubpath = kmem_alloc(*alloc_length, KM_SLEEP); + + (void) strcpy(pubpath, exi_public->exi_export.ex_path); + + rw_exit(&exported_lock); + + return (pubpath); +} + +static void +log_public_record(struct exportinfo *exi, cred_t *cr) +{ + struct svc_req req; + struct netbuf nb = {0, 0, NULL}; + int free_length = 0; + diropargs3 args; + LOOKUP3res res; + + bzero(&req, sizeof (req)); + req.rq_prog = NFSLOG_PROGRAM; + req.rq_vers = NFSLOG_VERSION; + req.rq_proc = NFSLOG_LOOKUP; + req.rq_cred.oa_flavor = AUTH_NONE; + + bzero(&args, sizeof (diropargs3)); + bzero(&res, sizeof (LOOKUP3res)); + + args.dir.fh3_length = 0; + if ((args.name = get_publicfh_path(&free_length)) == NULL) + return; + args.dirp = &args.dir; + + res.status = NFS3_OK; + res.res_u.ok.object.fh3_length = 0; + + /* + * Calling this function with the exi_public + * will have the effect of appending the record + * to each of the open log buffers + */ + nfslog_write_record(exi, &req, + (caddr_t)&args, (caddr_t)&res, cr, &nb, 0, NFSLOG_ALL_BUFFERS); + + kmem_free(args.name, free_length); +} + +/* + * nfslog_share_record - logs a share request. + * This is not an NFS request, but we pretend here... + */ +void +nfslog_share_record(struct exportinfo *exi, cred_t *cr) +{ + struct svc_req req; + int res = 0; + struct netbuf nb = {0, 0, NULL}; + + ASSERT(exi != NULL); + + if (nfslog_buffer_list == NULL) + return; + + if (exi->exi_export.ex_flags & EX_LOG) { + bzero(&req, sizeof (req)); + req.rq_prog = NFSLOG_PROGRAM; + req.rq_vers = NFSLOG_VERSION; + req.rq_proc = NFSLOG_SHARE; + req.rq_cred.oa_flavor = AUTH_NONE; + nfslog_write_record(exi, &req, + (caddr_t)exi, (caddr_t)&res, cr, + &nb, 0, NFSLOG_ONE_BUFFER); + } + + log_public_record(exi, cr); +} + +/* + * nfslog_unshare_record - logs an unshare request. + * This is not an NFS request, but we pretend here... + */ +void +nfslog_unshare_record(struct exportinfo *exi, cred_t *cr) +{ + struct svc_req req; + int res = 0; + struct netbuf nb = {0, 0, NULL}; + + ASSERT(exi != NULL); + ASSERT(exi->exi_export.ex_flags & EX_LOG); + + bzero(&req, sizeof (req)); + req.rq_prog = NFSLOG_PROGRAM; + req.rq_vers = NFSLOG_VERSION; + req.rq_proc = NFSLOG_UNSHARE; + req.rq_cred.oa_flavor = AUTH_NONE; + nfslog_write_record(exi, &req, + (caddr_t)exi, (caddr_t)&res, cr, &nb, 0, NFSLOG_ONE_BUFFER); +} + + +void +nfslog_getfh(struct exportinfo *exi, + fhandle *fh, + char *fname, + enum uio_seg seg, + cred_t *cr) +{ + struct svc_req req; + int res = 0; + struct netbuf nb = {0, 0, NULL}; + int error = 0; + char *namebuf; + size_t len; + nfslog_getfhargs gfh; + + ASSERT(exi != NULL); + ASSERT(exi->exi_export.ex_flags & EX_LOG); + + bzero(&req, sizeof (req)); + req.rq_prog = NFSLOG_PROGRAM; + req.rq_vers = NFSLOG_VERSION; + req.rq_proc = NFSLOG_GETFH; + req.rq_cred.oa_flavor = AUTH_NONE; + + namebuf = kmem_alloc(MAXPATHLEN + 4, KM_SLEEP); + if (seg == UIO_USERSPACE) { + error = copyinstr(fname, namebuf, MAXPATHLEN, &len); + } else { + error = copystr(fname, namebuf, MAXPATHLEN, &len); + } + + if (!error) { + gfh.gfh_fh_buf = *fh; + gfh.gfh_path = namebuf; + + nfslog_write_record(exi, &req, + (caddr_t)&gfh, (caddr_t)&res, cr, &nb, 0, + NFSLOG_ONE_BUFFER); + } + kmem_free(namebuf, MAXPATHLEN + 4); +}