Index: bsd/conf/MASTER =================================================================== RCS file: /Volumes/src/cvs/od/src/xnu/bsd/conf/MASTER,v retrieving revision 1.1.1.2 diff -u -r1.1.1.2 MASTER --- bsd/conf/MASTER 5 Sep 2002 20:29:57 -0000 1.1.1.2 +++ bsd/conf/MASTER 3 Dec 2002 20:03:13 -0000 @@ -252,6 +252,8 @@ # pseudo-device bpfilter 4 init bpf_init +pseudo-device systrace 1 init systrace_init + # # shim to "linux" mach disk drivers (mach drivers must also be turned on) # @@ -259,4 +261,3 @@ #pseudo-device diskshim pseudo-device random 1 init random_init - Index: bsd/conf/files =================================================================== RCS file: /Volumes/src/cvs/od/src/xnu/bsd/conf/files,v retrieving revision 1.1.1.3 diff -u -r1.1.1.3 files --- bsd/conf/files 5 Sep 2002 20:29:59 -0000 1.1.1.3 +++ bsd/conf/files 3 Dec 2002 20:03:13 -0000 @@ -60,6 +60,7 @@ OPTIONS/ktrace optional ktrace OPTIONS/profiling optional profiling OPTIONS/vndevice optional vndevice +OPTIONS/systrace optional systrace # # Network options @@ -462,6 +463,7 @@ bsd/kern/kern_subr.c standard bsd/kern/kern_synch.c standard bsd/kern/kern_sysctl.c standard +bsd/kern/kern_systrace.c optional systrace bsd/kern/kern_newsysctl.c standard bsd/kern/kern_mib.c standard bsd/kern/sysctl_init.c standard Index: bsd/dev/ppc/systemcalls.c =================================================================== RCS file: /Volumes/src/cvs/od/src/xnu/bsd/dev/ppc/systemcalls.c,v retrieving revision 1.1.1.2 diff -u -r1.1.1.2 systemcalls.c --- bsd/dev/ppc/systemcalls.c 5 Sep 2002 20:30:13 -0000 1.1.1.2 +++ bsd/dev/ppc/systemcalls.c 3 Dec 2002 20:03:13 -0000 @@ -20,6 +20,8 @@ * @APPLE_LICENSE_HEADER_END@ */ +#include "systrace.h" + #include #include #include @@ -36,6 +38,7 @@ #include #include #include +#include extern void unix_syscall( @@ -152,7 +155,11 @@ if (KTRPOINT(proc, KTR_SYSCALL)) ktrsyscall(proc, code, callp->sy_narg, uthread->uu_arg, funnel_type); - +#if NSYSTRACE > 0 + if ((proc->p_flag & P_SYSTRACE) && (funnel_type != THR_FUNNEL_NULL)) + error = systrace_enter(proc, code, uthread->uu_arg); + if (!error) +#endif error = (*(callp->sy_call))(proc, (void *)uthread->uu_arg, &(uthread->uu_rval[0])); regs = find_user_regs(thread_act); @@ -173,7 +180,10 @@ if (KTRPOINT(proc, KTR_SYSRET)) ktrsysret(proc, code, error, uthread->uu_rval[0], funnel_type); - +#if NSYSTRACE > 0 + if ((proc->p_flag & P_SYSTRACE) && (funnel_type != THR_FUNNEL_NULL)) + systrace_exit(proc, code, uthread->uu_arg, &(uthread->uu_rval[0]), error); +#endif if(funnel_type == KERNEL_FUNNEL) exit_funnel_section(kernel_flock); else if (funnel_type == NETWORK_FUNNEL) Index: bsd/kern/kern_exec.c =================================================================== RCS file: /Volumes/src/cvs/od/src/xnu/bsd/kern/kern_exec.c,v retrieving revision 1.1.1.2 diff -u -r1.1.1.2 kern_exec.c --- bsd/kern/kern_exec.c 5 Sep 2002 20:30:24 -0000 1.1.1.2 +++ bsd/kern/kern_exec.c 3 Dec 2002 20:03:14 -0000 @@ -656,7 +656,8 @@ if (load_result.unixproc) { int pathptr; - ucp = ucp - nc - NBPW; /* begining of the STRING AREA */ + /* begining of the STRING AREA */ + ucp = ucp - nc - NBPW - STACKGAPLEN; /* * Support for new app package launching for Mac OS X allocates Index: bsd/kern/kern_exit.c =================================================================== RCS file: /Volumes/src/cvs/od/src/xnu/bsd/kern/kern_exit.c,v retrieving revision 1.1.1.2 diff -u -r1.1.1.2 kern_exit.c --- bsd/kern/kern_exit.c 5 Sep 2002 20:30:24 -0000 1.1.1.2 +++ bsd/kern/kern_exit.c 3 Dec 2002 20:03:14 -0000 @@ -59,6 +59,8 @@ * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 */ + +#include "systrace.h" #include #include @@ -91,6 +93,7 @@ #if KTRACE #include #endif +#include extern char init_task_failure_data[]; int exit1 __P((struct proc *, int, int *)); @@ -319,6 +322,10 @@ p->p_tracep = NULL; vrele(tvp); } +#endif +#if NSYSTRACE > 0 + if (p->p_flag & P_SYSTRACE) + systrace_sys_exit(p); #endif q = p->p_children.lh_first; Index: bsd/kern/kern_fork.c =================================================================== RCS file: /Volumes/src/cvs/od/src/xnu/bsd/kern/kern_fork.c,v retrieving revision 1.1.1.2 diff -u -r1.1.1.2 kern_fork.c --- bsd/kern/kern_fork.c 5 Sep 2002 20:30:24 -0000 1.1.1.2 +++ bsd/kern/kern_fork.c 3 Dec 2002 20:03:15 -0000 @@ -60,6 +60,8 @@ * @(#)kern_fork.c 8.8 (Berkeley) 2/14/95 */ +#include "systrace.h" + #include #include #include @@ -74,6 +76,7 @@ #if KTRACE #include #endif +#include #include #include @@ -163,6 +166,11 @@ newproc->p_flag |= P_INVFORK; newproc->p_vforkact = cur_act; +#if NSYSTRACE > 0 + /* Tell systrace what's happening. */ + if (p->p_flag & P_SYSTRACE) + systrace_sys_fork(p, newproc); +#endif ut->uu_flag |= P_VFORK; ut->uu_proc = newproc; ut->uu_userstate = (void *)act_thread_csave(); @@ -368,6 +376,13 @@ LIST_INSERT_HEAD(&allproc, p2, p_list); LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); TAILQ_INIT(&p2->p_evlist); + +#if NSYSTRACE > 0 + /* Tell systrace what's happening. */ + if (p1->p_flag & P_SYSTRACE) + systrace_sys_fork(p1, p2); +#endif + /* * Make child runnable, set start time. */ @@ -541,7 +556,6 @@ } #endif return(p2); - } #include Index: bsd/sys/file.h =================================================================== RCS file: /Volumes/src/cvs/od/src/xnu/bsd/sys/file.h,v retrieving revision 1.1.1.2 diff -u -r1.1.1.2 file.h --- bsd/sys/file.h 5 Sep 2002 20:30:54 -0000 1.1.1.2 +++ bsd/sys/file.h 3 Dec 2002 20:03:16 -0000 @@ -81,6 +81,7 @@ #define DTYPE_SOCKET 2 /* communications endpoint */ #define DTYPE_PSXSHM 3 /* POSIX Shared memory */ #define DTYPE_PSXSEM 4 /* POSIX Semaphores */ +#define DTYPE_SYSTRACE 5 /* Systrace fileops */ short f_type; /* descriptor type */ short f_count; /* reference count */ short f_msgcount; /* references from message queue */ Index: bsd/sys/proc.h =================================================================== RCS file: /Volumes/src/cvs/od/src/xnu/bsd/sys/proc.h,v retrieving revision 1.1.1.2 diff -u -r1.1.1.2 proc.h --- bsd/sys/proc.h 5 Sep 2002 20:30:56 -0000 1.1.1.2 +++ bsd/sys/proc.h 3 Dec 2002 20:03:16 -0000 @@ -153,6 +153,8 @@ int p_traceflag; /* Kernel trace points. */ struct vnode *p_tracep; /* Trace to vnode. */ + void *p_systrace; /* Back pointer to systrace */ + sigset_t p_siglist; /* DEPRECATED. */ struct vnode *p_textvp; /* Vnode of executable. */ @@ -308,8 +310,10 @@ /* Should be moved to machine-dependent areas. */ #define P_OWEUPC 0x08000 /* Owe process an addupc() call at next ast. */ +#define P_SYSTRACE 0x10000 /* Process system call tracing active */ + /* XXX Not sure what to do with these, yet. */ -#define P_FSTRACE 0x10000 /* tracing via file system (elsewhere?) */ +/* #define P_FSTRACE 0x10000 */ /* tracing via file system (elsewhere?) */ #define P_SSTEP 0x20000 /* process needs single-step fixup ??? */ #define P_WAITING 0x0040000 /* process has a wait() in progress */ Index: bsd/sys/systm.h =================================================================== RCS file: /Volumes/src/cvs/od/src/xnu/bsd/sys/systm.h,v retrieving revision 1.1.1.2 diff -u -r1.1.1.2 systm.h --- bsd/sys/systm.h 5 Sep 2002 20:30:57 -0000 1.1.1.2 +++ bsd/sys/systm.h 3 Dec 2002 20:03:16 -0000 @@ -142,6 +142,8 @@ #define getenv_int(a,b) (*b = 0) #define KASSERT(exp,msg) +#define STACKGAPLEN 512 + /* * General function declarations. */ --- /dev/null Tue Dec 3 14:27:21 2002 +++ bsd/kern/kern_systrace.c Tue Dec 3 14:39:54 2002 @@ -0,0 +1,1772 @@ +/* $OpenBSD: systrace.c,v 1.25 2002/11/10 04:34:56 art Exp $ */ +/* + * Copyright 2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Niels Provos. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +caddr_t stackgap_init(const struct proc *, size_t); +void *stackgap_alloc(const struct proc *, caddr_t *, size_t); +struct proc *systrace_curproc(void); + +int systraceopen(dev_t, int, int, struct proc *); +int systraceclose(dev_t, int, int, struct proc *); +int systraceread(dev_t, struct uio *, int); +int systracewrite(dev_t, struct uio *, int); +int systraceioctl(dev_t, u_long, caddr_t, int, struct proc *); +int systraceselect(dev_t, int, struct proc *); + +uid_t systrace_seteuid(struct proc *, uid_t); +gid_t systrace_setegid(struct proc *, gid_t); +int systracef_read(struct file *, struct uio *, struct ucred *, int, struct proc *); +int systracef_write(struct file *, struct uio *, struct ucred *, int, struct proc *); +int systracef_ioctl(struct file *, u_long, caddr_t, struct proc *p); +int systracef_select(struct file *, int, void *, struct proc *); +int systracef_close(struct file *, struct proc *); + +struct str_policy { + TAILQ_ENTRY(str_policy) next; + + int nr; + + int refcount; + + int nsysent; + u_char *sysent; +}; + +#define STR_PROC_ONQUEUE 0x01 +#define STR_PROC_WAITANSWER 0x02 +#define STR_PROC_SYSCALLRES 0x04 +#define STR_PROC_REPORT 0x08 /* Report emulation */ +#define STR_PROC_NEEDSEQNR 0x10 /* Answer must quote seqnr */ +#define STR_PROC_SETEUID 0x20 /* Elevate privileges */ +#define STR_PROC_SETEGID 0x40 + +struct str_process { + TAILQ_ENTRY(str_process) next; + TAILQ_ENTRY(str_process) msg_next; + + struct proc *proc; + int forcereport; + uid_t olduid; + gid_t oldgid; + + pid_t pid; + + struct fsystrace *parent; + struct str_policy *policy; + + struct systrace_replace *replace; + + int flags; + short answer; + short error; + u_int16_t seqnr; /* expected reply sequence number */ + + uid_t seteuid; + uid_t saveuid; + gid_t setegid; + gid_t savegid; + + struct str_message msg; +}; + +void systrace_lock(void); +void systrace_unlock(void); + +/* Needs to be called with fst locked */ + +int systrace_attach(struct fsystrace *, pid_t); +int systrace_detach(struct str_process *); +int systrace_answer(struct str_process *, struct systrace_answer *); +int systrace_io(struct str_process *, struct systrace_io *); +int systrace_policy(struct fsystrace *, struct systrace_policy *); +int systrace_preprepl(struct str_process *, struct systrace_replace *); +int systrace_replace(struct str_process *, size_t, register_t []); +int systrace_getcwd(struct fsystrace *, struct str_process *); + +int systrace_processready(struct str_process *); +struct proc *systrace_find(struct str_process *); +struct str_process *systrace_findpid(struct fsystrace *fst, pid_t pid); +void systrace_wakeup(struct fsystrace *); +void systrace_closepolicy(struct fsystrace *, struct str_policy *); +int systrace_insert_process(struct fsystrace *, struct proc *); +struct str_policy *systrace_newpolicy(struct fsystrace *, int); +int systrace_msg_child(struct fsystrace *, struct str_process *, pid_t); +int systrace_msg_ask(struct fsystrace *, struct str_process *, + int, size_t, register_t []); +int systrace_msg_result(struct fsystrace *, struct str_process *, + int, int, size_t, register_t [], register_t []); +int systrace_msg_emul(struct fsystrace *, struct str_process *); +int systrace_msg_ugid(struct fsystrace *, struct str_process *); +int systrace_make_msg(struct str_process *, int, void *, size_t); + +#define SYSTRACE_MAJOR -1 /* kernel picks major */ + +/* + * A struct describing which functions will get invoked for certain + * actions. + */ +static struct cdevsw systrace_cdevsw = +{ + systraceopen, /* open */ + systraceclose, /* close */ + systraceread, /* read */ + systracewrite, /* write */ + systraceioctl, /* ioctl */ + nulldev, /* stop */ + nulldev, /* reset */ + NULL, /* tty's */ + eno_select, /* select */ + eno_mmap, /* mmap */ + eno_strat, /* strategy */ + eno_getc, /* getc */ + eno_putc, /* putc */ + 0 /* type */ +}; + +static struct fileops systracefops = { + systracef_read, + systracef_write, + systracef_ioctl, + systracef_select, + systracef_close +}; + +static int gSystraceInstalled = 0; + +int systrace_debug = 0; +static struct lock__bsd__ systrace_lck; + +#define DPRINTF(y) if (systrace_debug) printf y; + +/* + * Called to initialize our device, + * and to register ourselves with devfs + */ + +void +systrace_init() +{ + int ret; + + if (gSystraceInstalled) + return; + + /* install us in the file system */ + gSystraceInstalled = 1; + + lockinit(&systrace_lck, PLOCK, "systrace", 0, 0); + + ret = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw); + if (ret < 0) { + printf("systrace_init: failed to allocate a major number!\n"); + gSystraceInstalled = 0; + return; + } + + devfs_make_node(makedev (ret, 0), DEVFS_CHAR, + UID_ROOT, GID_WHEEL, 0644, "systrace", 0); +} + +struct proc * +systrace_curproc(void) +{ + thread_act_t thread_act; + struct uthread *uthread; + struct proc *proc; + + thread_act = current_act(); + uthread = get_bsdthread_info(thread_act); + + if (!(uthread->uu_flag & P_VFORK)) + proc = (struct proc *)get_bsdtask_info(current_task()); + else + proc = current_proc(); + + return (proc); +} + +/* ARGSUSED */ +int +systracef_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags, + struct proc *p) +{ + struct fsystrace *fst = (struct fsystrace *)fp->f_data; + struct str_process *process; + int error = 0; + + if (uio->uio_resid != sizeof(struct str_message)) + return (EINVAL); + + again: + systrace_lock(); + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p); + systrace_unlock(); + if ((process = TAILQ_FIRST(&fst->messages)) != NULL) { + error = uiomove((caddr_t)&process->msg, + sizeof(struct str_message), uio); + if (!error) { + TAILQ_REMOVE(&fst->messages, process, msg_next); + CLR(process->flags, STR_PROC_ONQUEUE); + + if (SYSTR_MSG_NOPROCESS(process)) + FREE(process, M_TEMP); + + } + } else if (TAILQ_FIRST(&fst->processes) == NULL) { + /* EOF situation */ + ; + } else { + if (fp->f_flag & FNONBLOCK) + error = EAGAIN; + else { + lockmgr(&fst->lock, LK_RELEASE, NULL, p); + error = tsleep(fst, PWAIT|PCATCH, "systrrd", 0); + if (error) + goto out; + goto again; + } + + } + + lockmgr(&fst->lock, LK_RELEASE, NULL, p); + out: + return (error); +} + +/* ARGSUSED */ +int +systracef_write(struct file *fp, struct uio *uio, struct ucred *cred, + int flags, struct proc *p) +{ + return (EIO); +} + +#define POLICY_VALID(x) ((x) == SYSTR_POLICY_PERMIT || \ + (x) == SYSTR_POLICY_ASK || \ + (x) == SYSTR_POLICY_NEVER) + +/* ARGSUSED */ +int +systracef_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p) +{ + int ret = 0; + struct fsystrace *fst = (struct fsystrace *)fp->f_data; + struct filedesc *fdp; + struct str_process *strp; + pid_t pid = 0; + + switch (cmd) { + case FIONBIO: + case FIOASYNC: + return (0); + + case STRIOCDETACH: + case STRIOCREPORT: + pid = *(pid_t *)data; + if (!pid) + ret = EINVAL; + break; + case STRIOCANSWER: + pid = ((struct systrace_answer *)data)->stra_pid; + if (!pid) + ret = EINVAL; + break; + case STRIOCIO: + pid = ((struct systrace_io *)data)->strio_pid; + if (!pid) + ret = EINVAL; + break; + case STRIOCGETCWD: + pid = *(pid_t *)data; + if (!pid) + ret = EINVAL; + break; + case STRIOCATTACH: + case STRIOCRESCWD: + case STRIOCPOLICY: + break; + case STRIOCREPLACE: + pid = ((struct systrace_replace *)data)->strr_pid; + if (!pid) + ret = EINVAL; + break; + default: + ret = EINVAL; + break; + } + + if (ret) + return (ret); + + systrace_lock(); + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p); + systrace_unlock(); + if (pid) { + strp = systrace_findpid(fst, pid); + if (strp == NULL) { + ret = ESRCH; + goto unlock; + } + } + + switch (cmd) { + case STRIOCATTACH: + pid = *(pid_t *)data; + if (!pid) + ret = EINVAL; + else + ret = systrace_attach(fst, pid); + DPRINTF(("%s: attach to %u: %d\n", __func__, pid, ret)); + break; + case STRIOCDETACH: + ret = systrace_detach(strp); + break; + case STRIOCREPORT: + SET(strp->flags, STR_PROC_REPORT); + break; + case STRIOCANSWER: + ret = systrace_answer(strp, (struct systrace_answer *)data); + break; + case STRIOCIO: + ret = systrace_io(strp, (struct systrace_io *)data); + break; + case STRIOCPOLICY: + ret = systrace_policy(fst, (struct systrace_policy *)data); + break; + case STRIOCREPLACE: + ret = systrace_preprepl(strp, (struct systrace_replace *)data); + break; + case STRIOCRESCWD: + if (!fst->fd_pid) { + ret = EINVAL; + break; + } + fdp = p->p_fd; + + /* Release cwd from other process */ + if (fdp->fd_cdir) + vrele(fdp->fd_cdir); + if (fdp->fd_rdir) + vrele(fdp->fd_rdir); + /* This restores the cwd we had before */ + fdp->fd_cdir = fst->fd_cdir; + fdp->fd_rdir = fst->fd_rdir; + /* Note that we are normal again */ + fst->fd_pid = 0; + fst->fd_cdir = fst->fd_rdir = NULL; + break; + case STRIOCGETCWD: + ret = systrace_getcwd(fst, strp); + break; + default: + ret = EINVAL; + break; + } + + unlock: + lockmgr(&fst->lock, LK_RELEASE, NULL, p); + return (ret); +} + +/* ARGSUSED */ +int +systracef_select(struct file *fp, int which, void *wql, struct proc *p) +{ + struct fsystrace *fst = (struct fsystrace *)fp->f_data; + int ready = 0; + + if (which != FREAD) + return (0); + + systrace_lock(); + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p); + systrace_unlock(); + ready = TAILQ_FIRST(&fst->messages) != NULL; + if (!ready) + selrecord(p, &fst->si, wql); + lockmgr(&fst->lock, LK_RELEASE, NULL, p); + + return (ready); +} + +/* ARGSUSED */ +int +systracef_stat(fp, sb, p) + struct file *fp; + struct stat *sb; + struct proc *p; +{ + return (EOPNOTSUPP); +} + +/* ARGSUSED */ +int +systracef_close(fp, p) + struct file *fp; + struct proc *p; +{ + struct fsystrace *fst = (struct fsystrace *)fp->f_data; + struct str_process *strp; + struct str_policy *strpol; + + systrace_lock(); + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p); + systrace_unlock(); + + /* Untrace all processes */ + for (strp = TAILQ_FIRST(&fst->processes); strp; + strp = TAILQ_FIRST(&fst->processes)) { + struct proc *q = strp->proc; + + systrace_detach(strp); + psignal(q, SIGKILL); + } + + /* Clean up fork and exit messages */ + for (strp = TAILQ_FIRST(&fst->messages); strp; + strp = TAILQ_FIRST(&fst->messages)) { + TAILQ_REMOVE(&fst->messages, strp, msg_next); + FREE(strp, M_TEMP); + } + + /* Clean up all policies */ + for (strpol = TAILQ_FIRST(&fst->policies); strpol; + strpol = TAILQ_FIRST(&fst->policies)) + systrace_closepolicy(fst, strpol); + + /* Release vnodes */ + if (fst->fd_cdir) + vrele(fst->fd_cdir); + if (fst->fd_rdir) + vrele(fst->fd_rdir); + lockmgr(&fst->lock, LK_RELEASE, NULL, p); + + FREE(fp->f_data, M_TEMP); /* was M_XDATA */ + fp->f_data = NULL; + + return (0); +} + +void +systrace_lock(void) +{ + struct proc *curproc = systrace_curproc(); + lockmgr(&systrace_lck, LK_EXCLUSIVE, NULL, curproc); +} + +void +systrace_unlock(void) +{ + struct proc *curproc = systrace_curproc(); + lockmgr(&systrace_lck, LK_RELEASE, NULL, curproc); +} + +int +systraceopen(dev, flag, mode, p) + dev_t dev; + int flag; + int mode; + struct proc *p; +{ + return (0); +} + +int +systraceclose(dev, flag, mode, p) + dev_t dev; + int flag; + int mode; + struct proc *p; +{ + return (0); +} + +int +systraceread(dev, uio, ioflag) + dev_t dev; + struct uio *uio; + int ioflag; +{ + return (EIO); +} + +int +systracewrite(dev, uio, ioflag) + dev_t dev; + struct uio *uio; + int ioflag; +{ + return (EIO); +} + +int +systraceioctl(dev, cmd, data, flag, p) + dev_t dev; + u_long cmd; + caddr_t data; + int flag; + struct proc *p; +{ + struct file *f; + struct fsystrace *fst = NULL; + int fd, error; + + switch (cmd) { + case SYSTR_CLONE: + MALLOC(fst, struct fsystrace *, sizeof(struct fsystrace), + M_TEMP, M_WAITOK); /* was M_XDATA */ + + memset(fst, 0, sizeof(struct fsystrace)); + lockinit(&fst->lock, PLOCK, "systrace", 0, 0); + TAILQ_INIT(&fst->processes); + TAILQ_INIT(&fst->messages); + TAILQ_INIT(&fst->policies); + + if (suser(p->p_ucred, &p->p_acflag) == 0) + fst->issuser = 1; + fst->p_ruid = p->p_cred->p_ruid; + fst->p_rgid = p->p_cred->p_rgid; + + error = falloc(p, &f, &fd); + if (error) { + FREE(fst, M_TEMP); /* was M_XDATA */ + return (error); + } + f->f_flag = FREAD | FWRITE; + f->f_type = DTYPE_SYSTRACE; + f->f_ops = &systracefops; + f->f_data = (caddr_t) fst; + *(int *)data = fd; + *fdflags(p, fd) &= ~UF_RESERVED; + break; + default: + error = EINVAL; + break; + } + return (error); +} + +int +systraceselect(dev, rw, p) + dev_t dev; + int rw; + struct proc *p; +{ + return (0); +} + +void +systrace_wakeup(struct fsystrace *fst) +{ + wakeup((caddr_t)fst); + selwakeup(&fst->si); +} + +struct proc * +systrace_find(struct str_process *strp) +{ + struct proc *proc; + + if ((proc = pfind(strp->pid)) == NULL) + return (NULL); + + if (proc != strp->proc) + return (NULL); + + if (!ISSET(proc->p_flag, P_SYSTRACE)) + return (NULL); + + return (proc); +} + +void +systrace_sys_exit(struct proc *proc) +{ + struct proc *curproc = systrace_curproc(); + struct str_process *strp; + struct fsystrace *fst; + + systrace_lock(); + strp = proc->p_systrace; + if (strp != NULL) { + fst = strp->parent; + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, curproc); + systrace_unlock(); + + /* Insert Exit message */ + systrace_msg_child(fst, strp, -1); + + systrace_detach(strp); + lockmgr(&fst->lock, LK_RELEASE, NULL, curproc); + } else + systrace_unlock(); + CLR(proc->p_flag, P_SYSTRACE); +} + +void +systrace_sys_fork(struct proc *oldproc, struct proc *p) +{ + struct proc *curproc = systrace_curproc(); + struct str_process *oldstrp, *strp; + struct fsystrace *fst; + + systrace_lock(); + oldstrp = oldproc->p_systrace; + if (oldstrp == NULL) { + systrace_unlock(); + return; + } + + fst = oldstrp->parent; + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, curproc); + systrace_unlock(); + + if (systrace_insert_process(fst, p)) + goto out; + if ((strp = systrace_findpid(fst, p->p_pid)) == NULL) + panic("systrace_sys_fork"); + + /* Reference policy */ + if ((strp->policy = oldstrp->policy) != NULL) + strp->policy->refcount++; + + /* Insert fork message */ + systrace_msg_child(fst, oldstrp, p->p_pid); + out: + lockmgr(&fst->lock, LK_RELEASE, NULL, curproc); +} + +int +systrace_enter(struct proc *p, int code, register_t args[]) +{ + struct sysent *callp; + struct str_process *strp; + struct str_policy *strpolicy; + struct fsystrace *fst = NULL; + struct pcred *pc; + int policy, error = 0, maycontrol = 0, issuser = 0; + int argsize; + + if (code < 0 || code >= nsysent) + return (EINVAL); + + systrace_lock(); + strp = p->p_systrace; + if (strp == NULL) { + systrace_unlock(); + return (EINVAL); + } + + assert(strp->proc == p); + + fst = strp->parent; + + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p); + systrace_unlock(); + + strp->forcereport = 0; + + /* + * We can not monitor a SUID process unless we are root, + * but we wait until it executes something unprivileged. + * A non-root user may only monitor if the real uid and + * real gid match the monitored process. Changing the + * uid or gid causes P_SUGID to be set. + */ + if (fst->issuser) { + maycontrol = 1; + issuser =1 ; + } else if (!(p->p_flag & P_SUGID)) { + maycontrol = fst->p_ruid == p->p_cred->p_ruid && + fst->p_rgid == p->p_cred->p_rgid; + } + + if (!maycontrol) { + policy = SYSTR_POLICY_PERMIT; + } else { + /* Find out current policy */ + if ((strpolicy = strp->policy) == NULL) + policy = SYSTR_POLICY_ASK; + else { + if (code >= strpolicy->nsysent) + policy = SYSTR_POLICY_NEVER; + else + policy = strpolicy->sysent[code]; + } + } + + callp = &sysent[code]; + argsize = callp->sy_narg * sizeof(int); + + switch (policy) { + case SYSTR_POLICY_PERMIT: + break; + case SYSTR_POLICY_ASK: + /* Puts the current process to sleep, return unlocked */ + error = systrace_msg_ask(fst, strp, code, argsize, args); + + /* lock has been released in systrace_msg_ask() */ + fst = NULL; + + /* We might have detached by now for some reason */ + if (error) + break; + + systrace_lock(); + if ((strp = p->p_systrace) != NULL) { + fst = strp->parent; + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p); + systrace_unlock(); + + if (strp->answer == SYSTR_POLICY_NEVER) { + error = strp->error; + if (strp->replace != NULL) { + FREE(strp->replace, M_TEMP);/*M_XDATA*/ + strp->replace = NULL; + } + } else { + /* Replace the arguments if necessary */ + if (strp->replace != NULL) { + error = systrace_replace(strp, argsize, args); + } + } + } else + systrace_unlock(); + break; + default: + if (policy > 0) + error = policy; + else + error = EPERM; + break; + } + + if (fst) { + lockmgr(&fst->lock, LK_RELEASE, NULL, p); + fst = NULL; + } + + systrace_lock(); + if ((strp = p->p_systrace) == NULL) + goto out; + + if (error) { + strp->forcereport = -1; + goto out; + } + + pc = p->p_cred; + strp->olduid = pc->p_ruid; + strp->oldgid = pc->p_rgid; + + /* Elevate privileges as desired */ + if (issuser) { + if (ISSET(strp->flags, STR_PROC_SETEUID)) + strp->saveuid = systrace_seteuid(p, strp->seteuid); + if (ISSET(strp->flags, STR_PROC_SETEGID)) + strp->savegid = systrace_setegid(p, strp->setegid); + } else + CLR(strp->flags, STR_PROC_SETEUID|STR_PROC_SETEGID); + out: + systrace_unlock(); + return (error); +} + +void +systrace_exit(struct proc *p, register_t code, register_t args[], + register_t retval[], int error) +{ + const struct sysent *callp; + struct str_process *strp; + struct fsystrace *fst; + struct pcred *pc; + int argsize; + + systrace_lock(); + strp = p->p_systrace; + if (strp == NULL || strp->forcereport == -1) { + systrace_unlock(); + return; + } + + callp = &sysent[code]; + argsize = callp->sy_narg * sizeof(int); + + /* Return to old privileges */ + if (ISSET(strp->flags, STR_PROC_SETEUID)) { + if (pc->pc_ucred->cr_uid == strp->seteuid) + systrace_seteuid(p, strp->saveuid); + CLR(strp->flags, STR_PROC_SETEUID); + } + if (ISSET(strp->flags, STR_PROC_SETEGID)) { + if (pc->pc_ucred->cr_gid == strp->setegid) + systrace_setegid(p, strp->savegid); + CLR(strp->flags, STR_PROC_SETEGID); + } + + if (p->p_flag & P_SUGID) { + if ((fst = strp->parent) == NULL || !fst->issuser) { + systrace_unlock(); + return; + } + } + + /* See if we should force a report */ + if (strp != NULL && ISSET(strp->flags, STR_PROC_REPORT)) { + CLR(strp->flags, STR_PROC_REPORT); + strp->forcereport = 1; + } + + if (strp->forcereport && strp != NULL) { + fst = strp->parent; + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p); + systrace_unlock(); + + /* Old policy is without meaning now */ + if (strp->policy) { + systrace_closepolicy(fst, strp->policy); + strp->policy = NULL; + } + systrace_msg_emul(fst, strp); + } else + systrace_unlock(); + + systrace_lock(); + if ((strp = p->p_systrace) == NULL) { + systrace_unlock(); + return; + } + + /* Report if effective uid or gid changed */ + if (strp->olduid != p->p_cred->p_ruid || + strp->oldgid != p->p_cred->p_rgid) { + fst = strp->parent; + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p); + systrace_unlock(); + + systrace_msg_ugid(fst, strp); + } else + systrace_unlock(); + + /* Report result from system call */ + systrace_lock(); + if ((strp = p->p_systrace) == NULL) { + systrace_unlock(); + return; + } + + if (ISSET(strp->flags, STR_PROC_SYSCALLRES)) { + fst = strp->parent; + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p); + systrace_unlock(); + + CLR(strp->flags, STR_PROC_SYSCALLRES); + + systrace_msg_result(fst, strp, error, code, argsize, args, retval); + } else + systrace_unlock(); +} + +uid_t +systrace_seteuid(struct proc *p, uid_t euid) +{ + struct pcred *pc = p->p_cred; + uid_t oeuid = pc->pc_ucred->cr_uid; + + if (pc->pc_ucred->cr_uid == euid) + return (oeuid); + + /* + * Copy credentials so other references do not see our changes. + */ + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_uid = euid; + p->p_flag |= P_SUGID; + + return (oeuid); +} + +gid_t +systrace_setegid(struct proc *p, gid_t egid) +{ + struct pcred *pc = p->p_cred; + gid_t oegid = pc->pc_ucred->cr_gid; + + if (pc->pc_ucred->cr_gid == egid) + return (oegid); + + /* + * Copy credentials so other references do not see our changes. + */ + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_gid = egid; + p->p_flag |= P_SUGID; + + return (oegid); +} + +/* Called with fst locked */ + +int +systrace_answer(struct str_process *strp, struct systrace_answer *ans) +{ + int error = 0; + + DPRINTF(("%s: %u: policy %d\n", __func__, + ans->stra_pid, ans->stra_policy)); + + if (!POLICY_VALID(ans->stra_policy)) { + error = EINVAL; + goto out; + } + + /* Check if answer is in sync with us */ + if (ans->stra_seqnr != strp->seqnr) { + error = ESRCH; + goto out; + } + + if ((error = systrace_processready(strp)) != 0) + goto out; + + strp->answer = ans->stra_policy; + strp->error = ans->stra_error; + if (!strp->error) + strp->error = EPERM; + if (ISSET(ans->stra_flags, SYSTR_FLAGS_RESULT)) + SET(strp->flags, STR_PROC_SYSCALLRES); + + /* See if we should elevate privileges for this system call */ + if (ISSET(ans->stra_flags, SYSTR_FLAGS_SETEUID)) { + SET(strp->flags, STR_PROC_SETEUID); + strp->seteuid = ans->stra_seteuid; + } + if (ISSET(ans->stra_flags, SYSTR_FLAGS_SETEGID)) { + SET(strp->flags, STR_PROC_SETEGID); + strp->setegid = ans->stra_setegid; + } + + + /* Clearing the flag indicates to the process that it woke up */ + CLR(strp->flags, STR_PROC_WAITANSWER); + wakeup(strp); + out: + + return (error); +} + +int +systrace_policy(struct fsystrace *fst, struct systrace_policy *pol) +{ + struct str_policy *strpol; + struct str_process *strp; + + switch(pol->strp_op) { + case SYSTR_POLICY_NEW: + DPRINTF(("%s: new, ents %d\n", __func__, + pol->strp_maxents)); + if (pol->strp_maxents <= 0 || pol->strp_maxents > 1024) + return (EINVAL); + strpol = systrace_newpolicy(fst, pol->strp_maxents); + if (strpol == NULL) + return (ENOBUFS); + pol->strp_num = strpol->nr; + break; + case SYSTR_POLICY_ASSIGN: + DPRINTF(("%s: %d -> pid %d\n", __func__, + pol->strp_num, pol->strp_pid)); + + /* Find right policy by number */ + TAILQ_FOREACH(strpol, &fst->policies, next) + if (strpol->nr == pol->strp_num) + break; + if (strpol == NULL) + return (EINVAL); + + strp = systrace_findpid(fst, pol->strp_pid); + if (strp == NULL) + return (EINVAL); + + if (strp->policy) + systrace_closepolicy(fst, strp->policy); + strp->policy = strpol; + strpol->refcount++; + + break; + case SYSTR_POLICY_MODIFY: + DPRINTF(("%s: %d: code %d -> policy %d\n", __func__, + pol->strp_num, pol->strp_code, pol->strp_policy)); + if (!POLICY_VALID(pol->strp_policy)) + return (EINVAL); + TAILQ_FOREACH(strpol, &fst->policies, next) + if (strpol->nr == pol->strp_num) + break; + if (strpol == NULL) + return (EINVAL); + if (pol->strp_code < 0 || pol->strp_code >= strpol->nsysent) + return (EINVAL); + strpol->sysent[pol->strp_code] = pol->strp_policy; + break; + default: + return (EINVAL); + } + + return (0); +} + +int +systrace_processready(struct str_process *strp) +{ + if (ISSET(strp->flags, STR_PROC_ONQUEUE)) + return (EBUSY); + + if (!ISSET(strp->flags, STR_PROC_WAITANSWER)) + return (EBUSY); + + /* XXX - BSD person can not tell if a process is sleeping ??? + if (strp->proc->p_stat != SSLEEP) + return (EBUSY); + */ + + return (0); +} + +int +systrace_getcwd(struct fsystrace *fst, struct str_process *strp) +{ + struct proc *curproc = systrace_curproc(); + struct filedesc *myfdp, *fdp; + int error; + + DPRINTF(("%s: %d\n", __func__, strp->pid)); + + error = systrace_processready(strp); + if (error) + return (error); + + myfdp = curproc->p_fd; + fdp = strp->proc->p_fd; + if (myfdp == NULL || fdp == NULL) + return (EINVAL); + + /* Store our current values */ + fst->fd_pid = strp->pid; + fst->fd_cdir = myfdp->fd_cdir; + fst->fd_rdir = myfdp->fd_rdir; + + if ((myfdp->fd_cdir = fdp->fd_cdir) != NULL) + VREF(myfdp->fd_cdir); + if ((myfdp->fd_rdir = fdp->fd_rdir) != NULL) + VREF(myfdp->fd_rdir); + + return (0); +} + +int +systrace_io(struct str_process *strp, struct systrace_io *io) +{ + struct proc *curproc = systrace_curproc(); + struct proc *p = curproc, *t = strp->proc; + struct uio uio; + struct iovec iov; + int error = 0; + + DPRINTF(("%s: %u: %p(%lu)\n", __func__, + io->strio_pid, io->strio_offs, (u_long)io->strio_len)); + + switch (io->strio_op) { + case SYSTR_READ: + uio.uio_rw = UIO_READ; + break; + case SYSTR_WRITE: + uio.uio_rw = UIO_WRITE; + break; + default: + return (EINVAL); + } + + error = systrace_processready(strp); + if (error) + goto out; + + iov.iov_base = io->strio_addr; + iov.iov_len = io->strio_len; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = (off_t)(long)io->strio_offs; + uio.uio_resid = io->strio_len; + uio.uio_segflg = UIO_USERSPACE; + uio.uio_procp = p; + + error = systrace_domem(p, t, NULL, &uio); + io->strio_len -= uio.uio_resid; + out: + + return (error); +} + +int +systrace_attach(struct fsystrace *fst, pid_t pid) +{ + struct proc *curproc = systrace_curproc(); + int error = 0; + struct proc *proc, *p = curproc; + + if ((proc = pfind(pid)) == NULL) { + error = ESRCH; + goto out; + } + + /* + * You can't attach to a process if: + * (1) it's the process that's doing the attaching, + */ + if (proc->p_pid == p->p_pid) { + error = EINVAL; + goto out; + } + + /* + * (2) it's a system process + */ + if (ISSET(proc->p_flag, P_SYSTEM)) { + error = EPERM; + goto out; + } + + /* + * (3) it's being traced already + */ + if (ISSET(proc->p_flag, P_SYSTRACE)) { + error = EBUSY; + goto out; + } + + /* + * (4) it's not owned by you, or the last exec + * gave us setuid/setgid privs (unless + * you're root), or... + * + * [Note: once P_SUGID gets set in execve(), it stays + * set until the process does another execve(). Hence + * this prevents a setuid process which revokes it's + * special privilidges using setuid() from being + * traced. This is good security.] + */ + if ((proc->p_cred->p_ruid != p->p_cred->p_ruid || + ISSET(proc->p_flag, P_SUGID)) && + (error = suser(p->p_ucred, &p->p_acflag)) != 0) + goto out; + + /* + * (5) ...it's init, which controls the security level + * of the entire system, and the system was not + * compiled with permanently insecure mode turned + * on. + */ + if ((proc->p_pid == 1) && (securelevel > -1)) { + error = EPERM; + goto out; + } + + error = systrace_insert_process(fst, proc); + + out: + return (error); +} + +/* Prepare to replace arguments */ + +int +systrace_preprepl(struct str_process *strp, struct systrace_replace *repl) +{ + size_t len; + int i, ret = 0; + + ret = systrace_processready(strp); + if (ret) + return (ret); + + if (strp->replace != NULL) { + FREE(strp->replace, M_TEMP); /* M_XDATA */ + strp->replace = NULL; + } + + if (repl->strr_nrepl < 0 || repl->strr_nrepl > SYSTR_MAXARGS) + return (EINVAL); + + for (i = 0, len = 0; i < repl->strr_nrepl; i++) { + len += repl->strr_offlen[i]; + if (repl->strr_offlen[i] == 0) + continue; + if (repl->strr_offlen[i] + repl->strr_off[i] > len) + return (EINVAL); + } + + /* Make sure that the length adds up */ + if (repl->strr_len != len) + return (EINVAL); + + /* Check against a maximum length */ + if (repl->strr_len > 2048) + return (EINVAL); + + MALLOC(strp->replace, struct systrace_replace *, + sizeof(struct systrace_replace) + len, + M_TEMP, M_WAITOK); /* M_XDATA */ + + memcpy(strp->replace, repl, sizeof(struct systrace_replace)); + ret = copyin(repl->strr_base, strp->replace + 1, len); + if (ret) { + FREE(strp->replace, M_TEMP); /* M_XDATA */ + strp->replace = NULL; + return (ret); + } + + /* Adjust the offset */ + repl = strp->replace; + repl->strr_base = (caddr_t)(repl + 1); + + return (0); +} + +/* + * Replace the arguments with arguments from the monitoring process. + */ + +int +systrace_replace(struct str_process *strp, size_t argsize, register_t args[]) +{ + struct proc *p = strp->proc; + struct systrace_replace *repl = strp->replace; + caddr_t sg, kdata, udata, kbase, ubase; + int i, maxarg, ind, ret = 0; + + maxarg = argsize/sizeof(register_t); + sg = stackgap_init(p, 0); + ubase = stackgap_alloc(p, &sg, repl->strr_len); + + kbase = repl->strr_base; + for (i = 0; i < maxarg && i < repl->strr_nrepl; i++) { + ind = repl->strr_argind[i]; + if (ind < 0 || ind >= maxarg) { + ret = EINVAL; + goto out; + } + if (repl->strr_offlen[i] == 0) { + args[ind] = repl->strr_off[i]; + continue; + } + kdata = kbase + repl->strr_off[i]; + udata = ubase + repl->strr_off[i]; + if (copyout(kdata, udata, repl->strr_offlen[i])) { + ret = EINVAL; + goto out; + } + + /* Replace the argument with the new address */ + args[ind] = (register_t)udata; + } + + out: + FREE(repl, M_TEMP); /* M_XDATA */ + strp->replace = NULL; + return (ret); +} + +struct str_process * +systrace_findpid(struct fsystrace *fst, pid_t pid) +{ + struct str_process *strp; + struct proc *proc = NULL; + + TAILQ_FOREACH(strp, &fst->processes, next) + if (strp->pid == pid) + break; + + if (strp == NULL) + return (NULL); + + proc = systrace_find(strp); + + return (proc ? strp : NULL); +} + +int +systrace_detach(struct str_process *strp) +{ + struct proc *proc; + struct fsystrace *fst = NULL; + int error = 0; + + DPRINTF(("%s: Trying to detach from %d\n", __func__, strp->pid)); + + if ((proc = systrace_find(strp)) != NULL) { + CLR(proc->p_flag, P_SYSTRACE); + proc->p_systrace = NULL; + } else + error = ESRCH; + + if (ISSET(strp->flags, STR_PROC_WAITANSWER)) { + CLR(strp->flags, STR_PROC_WAITANSWER); + wakeup(strp); + } + + fst = strp->parent; + systrace_wakeup(fst); + + if (ISSET(strp->flags, STR_PROC_ONQUEUE)) + TAILQ_REMOVE(&fst->messages, strp, msg_next); + + TAILQ_REMOVE(&fst->processes, strp, next); + fst->nprocesses--; + + if (strp->policy) + systrace_closepolicy(fst, strp->policy); + if (strp->replace) + FREE(strp->replace, M_TEMP); /* M_XDATA */ + FREE(strp, M_TEMP); + + return (error); +} + +void +systrace_closepolicy(struct fsystrace *fst, struct str_policy *policy) +{ + if (--policy->refcount) + return; + + fst->npolicies--; + + if (policy->nsysent) + FREE(policy->sysent, M_TEMP); /* M_XDATA */ + + TAILQ_REMOVE(&fst->policies, policy, next); + + FREE(policy, M_TEMP); +} + + +int +systrace_insert_process(struct fsystrace *fst, struct proc *proc) +{ + struct str_process *strp; + + MALLOC(strp, struct str_process *, sizeof(struct str_process), M_TEMP, + M_NOWAIT); + if (strp == NULL) + return (ENOBUFS); + + memset((caddr_t)strp, 0, sizeof(struct str_process)); + strp->pid = proc->p_pid; + strp->proc = proc; + strp->parent = fst; + + TAILQ_INSERT_TAIL(&fst->processes, strp, next); + fst->nprocesses++; + + proc->p_systrace = strp; + SET(proc->p_flag, P_SYSTRACE); + + return (0); +} + +struct str_policy * +systrace_newpolicy(struct fsystrace *fst, int maxents) +{ + struct str_policy *pol; + int i; + + if (fst->npolicies > SYSTR_MAX_POLICIES && !fst->issuser) + return (NULL); + + MALLOC(pol, struct str_policy *, sizeof(struct str_policy), M_TEMP, + M_NOWAIT); + if (pol == NULL) + return (NULL); + + DPRINTF(("%s: allocating %d -> %lu\n", __func__, + maxents, (u_long)maxents * sizeof(int))); + + memset((caddr_t)pol, 0, sizeof(struct str_policy)); + + MALLOC(pol->sysent, u_char *, maxents * sizeof(u_char), + M_TEMP, M_WAITOK); /* M_XDATA */ + pol->nsysent = maxents; + for (i = 0; i < maxents; i++) + pol->sysent[i] = SYSTR_POLICY_ASK; + + fst->npolicies++; + pol->nr = fst->npolicynr++; + pol->refcount = 1; + + TAILQ_INSERT_TAIL(&fst->policies, pol, next); + + return (pol); +} + +int +systrace_msg_ask(struct fsystrace *fst, struct str_process *strp, + int code, size_t argsize, register_t args[]) +{ + struct str_msg_ask msg_ask; + int i; + + msg_ask.code = code; + msg_ask.argsize = argsize; + for (i = 0; i < (argsize/sizeof(register_t)) && i < SYSTR_MAXARGS; i++) + msg_ask.args[i] = args[i]; + + return (systrace_make_msg(strp, SYSTR_MSG_ASK, + &msg_ask, sizeof(msg_ask))); +} + +int +systrace_msg_result(struct fsystrace *fst, struct str_process *strp, + int error, int code, size_t argsize, register_t args[], register_t rval[]) +{ + struct str_msg_ask msg_ask; + int i; + + msg_ask.code = code; + msg_ask.argsize = argsize; + msg_ask.result = error; + for (i = 0; i < (argsize/sizeof(register_t)) && i < SYSTR_MAXARGS; i++) + msg_ask.args[i] = args[i]; + + msg_ask.rval[0] = rval[0]; + msg_ask.rval[1] = rval[1]; + + return (systrace_make_msg(strp, SYSTR_MSG_RES, + &msg_ask, sizeof(msg_ask))); +} + +int +systrace_msg_emul(struct fsystrace *fst, struct str_process *strp) +{ + struct str_msg_emul msg_emul; + struct proc *p = strp->proc; + + memcpy(msg_emul.emul, "darwin\0\0", SYSTR_EMULEN); + + return (systrace_make_msg(strp, SYSTR_MSG_EMUL, &msg_emul, + sizeof(msg_emul))); +} + +int +systrace_msg_ugid(struct fsystrace *fst, struct str_process *strp) +{ + struct str_msg_ugid msg_ugid; + struct proc *p = strp->proc; + + msg_ugid.uid = p->p_cred->p_ruid; + msg_ugid.gid = p->p_cred->p_rgid; + + return (systrace_make_msg(strp, SYSTR_MSG_UGID, + &msg_ugid, sizeof(msg_ugid))); +} + +int +systrace_make_msg(struct str_process *strp, int type, void *data, size_t len) +{ + struct str_message *msg = &strp->msg; + struct fsystrace *fst = strp->parent; + struct proc *p = strp->proc; + int st, again; + + do { + again = 0; + if (ISSET(strp->flags, STR_PROC_ONQUEUE)) { + /* We need to wait before we can post this message. + * Multi-threading causes this issue. + */ + again = 1; + goto out; + } + + memcpy(&msg->msg_data, data, len); + msg->msg_seqnr = ++strp->seqnr; + msg->msg_type = type; + msg->msg_pid = strp->pid; + if (strp->policy) + msg->msg_policy = strp->policy->nr; + else + msg->msg_policy = -1; + + TAILQ_INSERT_TAIL(&fst->messages, strp, msg_next); + SET(strp->flags, STR_PROC_ONQUEUE); + + out: + SET(strp->flags, STR_PROC_WAITANSWER); + systrace_wakeup(fst); + + while (1) { + /* Release the lock - XXX */ + lockmgr(&fst->lock, LK_RELEASE, NULL, p); + st = tsleep(strp, PWAIT | PCATCH, "systrmsg", 0); + if (st != 0) + return (EINTR); + + systrace_lock(); + + /* If we detach, then everything is permitted */ + if ((strp = p->p_systrace) == NULL) { + systrace_unlock(); + return (0); + } + fst = strp->parent; + lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p); + systrace_unlock(); + + if (!ISSET(strp->flags, STR_PROC_WAITANSWER)) + break; + } + + } while (again); + + lockmgr(&fst->lock, LK_RELEASE, NULL, p); + + return (0); +} + +int +systrace_msg_child(struct fsystrace *fst, struct str_process *strp, pid_t npid) +{ + struct str_process *nstrp; + struct str_message *msg; + struct str_msg_child *msg_child; + + MALLOC(nstrp, struct str_process *, sizeof(struct str_process), M_TEMP, + M_WAITOK); + memset(nstrp, 0, sizeof(struct str_process)); + + DPRINTF(("%s: %p: pid %d -> pid %d\n", __func__, + nstrp, strp->pid, npid)); + + msg = &nstrp->msg; + msg_child = &msg->msg_data.msg_child; + + msg->msg_type = SYSTR_MSG_CHILD; + msg->msg_pid = strp->pid; + if (strp->policy) + msg->msg_policy = strp->policy->nr; + else + msg->msg_policy = -1; + msg_child->new_pid = npid; + + TAILQ_INSERT_TAIL(&fst->messages, nstrp, msg_next); + + systrace_wakeup(fst); + + return (0); +} + +/* Simple allocator for the stackgap adapated from NetBSD */ + +caddr_t +stackgap_init(const struct proc *p, size_t sz) +{ + if (sz == 0) + sz = STACKGAPLEN; + if (sz > STACKGAPLEN) + panic("size %lu > STACKGAPLEN", (unsigned long)sz); +/* XXX - i386 might have sigcode on stack */ +#define szsigcode (0) + return (caddr_t)(((unsigned long)p->user_stack + - (unsigned long)szsigcode - sz) & ~ALIGNBYTES); +#undef szsigcode +} + + +void * +stackgap_alloc(const struct proc *p, caddr_t *sgp, size_t sz) +{ + void *n = (void *) *sgp; + caddr_t nsgp; + int sigsize = 0; + + sz = ALIGN(sz); + nsgp = *sgp + sz; + if (nsgp > (((caddr_t)p->user_stack) - sigsize)) + return NULL; + *sgp = nsgp; + return n; +} + +/* User Address Space memory functions */ + +static int +systrace_rmem(p, uio) + struct proc *p; + struct uio *uio; +{ + struct proc *curproc = systrace_curproc(); + vm_offset_t kv_start, copy_end; + vm_map_t proc_map; + struct task * task; + kern_return_t ret; + int error = 0; + + if (uio->uio_rw == UIO_WRITE) + return (EINVAL); + + if ((p->p_ucred->cr_uid != curproc->p_ucred->cr_uid) + && suser(curproc->p_ucred, &curproc->p_acflag)) + return (EPERM); + + task = p->task; + if (task == NULL) + return (EINVAL); + + if (!task_reference_try(task)) + return (EINVAL); + + proc_map = get_task_map(task); + + /* Allocate a single page */ + ret = kmem_alloc(kernel_map, &kv_start, PAGE_SIZE); + if (ret != KERN_SUCCESS) { + task_deallocate(task); + return(ENOMEM); + } + /* + * Only map in one page at a time. We don't have to, but it + * makes things easier. This way is trivial - right? + */ + do { + int needed = 0; + vm_map_copy_t tmp; + vm_offset_t arg_addr; + vm_size_t arg_size; + caddr_t data; + unsigned size; + vm_offset_t dealloc_start; /* area to remove from kernel map */ + vm_offset_t dealloc_end; + int *ip; + kern_return_t ret; + + vm_offset_t kva; + vm_offset_t uva; + int page_offset; /* offset into page */ + vm_offset_t pageno; /* page number */ + vm_offset_t off; + u_int len; + + uva = (vm_offset_t) uio->uio_offset; + if ((caddr_t)uva > p->user_stack) { + error = 0; + break; + } + + /* + * Get the page number of this segment. + */ + pageno = trunc_page(uva); + page_offset = uva - pageno; + + if(vm_map_copyin(proc_map, pageno, PAGE_SIZE, FALSE, &tmp) + != KERN_SUCCESS) { + error = EIO; + break; + } + + if(vm_map_copy_overwrite(kernel_map, kv_start, tmp, FALSE) + != KERN_SUCCESS) { + error = EIO; + break; + } + + /* + * How many bytes to copy + */ + len = min(PAGE_SIZE - page_offset, uio->uio_resid); + + /* + * Now do the i/o move. + */ + error = uiomove((caddr_t)kv_start + page_offset, len, uio); + + + } while (error == 0 && uio->uio_resid > 0); + + task_deallocate(task); + kmem_free(kernel_map, kv_start, PAGE_SIZE); + + return (error); +} + +/* + * Copy data in and out of the target process. + * We do this by mapping the process's page into + * the kernel and then doing a uiomove direct + * from the kernel address space. + */ +int +systrace_domem(curp, p, pfs, uio) + struct proc *curp; + struct proc *p; + struct pfsnode *pfs; + struct uio *uio; +{ + + if (uio->uio_resid == 0) + return (0); + + return (systrace_rmem(p, uio)); +} --- /dev/null Tue Dec 3 14:27:21 2002 +++ bsd/sys/systrace.h Tue Dec 3 12:39:54 2002 @@ -0,0 +1,195 @@ +/* $OpenBSD: systrace.h,v 1.11 2002/10/25 23:22:58 fgsch Exp $ */ +/* + * Copyright 2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Niels Provos. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYSTRACE_H_ +#define _SYSTRACE_H_ + +#include + +#define SYSTR_CLONE _IOR('s', 1, int) + +#define SYSTR_EMULEN 8 /* sync with sys proc */ + +struct str_msg_emul { + char emul[SYSTR_EMULEN]; +}; + +struct str_msg_ugid { + uid_t uid; + gid_t gid; +}; + +#define SYSTR_MAX_POLICIES 64 +#define SYSTR_MAXARGS 64 + +struct str_msg_ask { + int code; + int argsize; + register_t args[SYSTR_MAXARGS]; + register_t rval[2]; + int result; +}; + +/* Queued on fork or exit of a process */ + +struct str_msg_child { + pid_t new_pid; +}; + +#define SYSTR_MSG_ASK 1 +#define SYSTR_MSG_RES 2 +#define SYSTR_MSG_EMUL 3 +#define SYSTR_MSG_CHILD 4 +#define SYSTR_MSG_UGID 5 + +#define SYSTR_MSG_NOPROCESS(x) \ + ((x)->msg.msg_type == SYSTR_MSG_CHILD) + +struct str_message { + int msg_type; + pid_t msg_pid; + u_int16_t msg_seqnr; /* answer has to match seqnr */ + short msg_policy; + union { + struct str_msg_emul msg_emul; + struct str_msg_ugid msg_ugid; + struct str_msg_ask msg_ask; + struct str_msg_child msg_child; + } msg_data; +}; + +struct systrace_answer { + pid_t stra_pid; + u_int16_t stra_seqnr; + short reserved; + uid_t stra_seteuid; /* elevated privileges for system call */ + uid_t stra_setegid; + int stra_policy; + int stra_error; + int stra_flags; +}; + +#define SYSTR_READ 1 +#define SYSTR_WRITE 2 + +struct systrace_io { + pid_t strio_pid; + int strio_op; + void *strio_offs; + void *strio_addr; + size_t strio_len; +}; + +#define SYSTR_POLICY_NEW 1 +#define SYSTR_POLICY_ASSIGN 2 +#define SYSTR_POLICY_MODIFY 3 + +struct systrace_policy { + int strp_op; + int strp_num; + union { + struct { + short code; + short policy; + } assign; + pid_t pid; + int maxents; + } strp_data; +}; + +#define strp_pid strp_data.pid +#define strp_maxents strp_data.maxents +#define strp_code strp_data.assign.code +#define strp_policy strp_data.assign.policy + +struct systrace_replace { + pid_t strr_pid; + int strr_nrepl; + caddr_t strr_base; /* Base memory */ + size_t strr_len; /* Length of memory */ + int strr_argind[SYSTR_MAXARGS]; + size_t strr_off[SYSTR_MAXARGS]; + size_t strr_offlen[SYSTR_MAXARGS]; +}; + +#define STRIOCATTACH _IOW('s', 101, pid_t) +#define STRIOCDETACH _IOW('s', 102, pid_t) +#define STRIOCANSWER _IOW('s', 103, struct systrace_answer) +#define STRIOCIO _IOWR('s', 104, struct systrace_io) +#define STRIOCPOLICY _IOWR('s', 105, struct systrace_policy) +#define STRIOCGETCWD _IOW('s', 106, pid_t) +#define STRIOCRESCWD _IO('s', 107) +#define STRIOCREPORT _IOW('s', 108, pid_t) +#define STRIOCREPLACE _IOW('s', 109, struct systrace_replace) + +#define SYSTR_POLICY_ASK 0 +#define SYSTR_POLICY_PERMIT 1 +#define SYSTR_POLICY_NEVER 2 + +#define SYSTR_FLAGS_RESULT 0x001 +#define SYSTR_FLAGS_SETEUID 0x002 +#define SYSTR_FLAGS_SETEGID 0x004 + +#ifdef KERNEL +struct str_process; +struct fsystrace { + struct lock__bsd__ lock; + struct selinfo si; + + TAILQ_HEAD(strprocessq, str_process) processes; + int nprocesses; + + TAILQ_HEAD(strpolicyq, str_policy) policies; + + struct strprocessq messages; + + int npolicynr; + int npolicies; + + int issuser; + uid_t p_ruid; + gid_t p_rgid; + + /* cwd magic */ + pid_t fd_pid; + struct vnode *fd_cdir; + struct vnode *fd_rdir; +}; + +/* Internal prototypes */ + +int systrace_enter(struct proc *, register_t, register_t []); +void systrace_exit(struct proc *, register_t, register_t [], register_t [], int); +void systrace_sys_exit(struct proc *); +void systrace_sys_fork(struct proc *, struct proc *); + +#endif /* KERNEL */ +#endif /* _SYSTRACE_H_ */ --- /dev/null Tue Dec 3 14:27:21 2002 +++ bsd/sys/tree.h Tue Nov 26 21:43:14 2002 @@ -0,0 +1,678 @@ +/* $NetBSD: tree.h,v 1.5 2002/11/02 07:35:07 perry Exp $ */ +/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */ +/* + * Copyright 2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_TREE_H_ +#define _SYS_TREE_H_ + +/* + * This file defines data structures for different types of trees: + * splay trees and red-black trees. + * + * A splay tree is a self-organizing data structure. Every operation + * on the tree causes a splay to happen. The splay moves the requested + * node to the root of the tree and partly rebalances it. + * + * This has the benefit that request locality causes faster lookups as + * the requested nodes move to the top of the tree. On the other hand, + * every lookup causes memory writes. + * + * The Balance Theorem bounds the total access time for m operations + * and n inserts on an initially empty tree as O((m + n)lg n). The + * amortized cost for a sequence of m accesses to a splay tree is O(lg n); + * + * A red-black tree is a binary search tree with the node color as an + * extra attribute. It fulfills a set of conditions: + * - every search path from the root to a leaf consists of the + * same number of black nodes, + * - each red node (except for the root) has a black parent, + * - each leaf node is black. + * + * Every operation on a red-black tree is bounded as O(lg n). + * The maximum height of a red-black tree is 2lg (n+1). + */ + +#define SPLAY_HEAD(name, type) \ +struct name { \ + struct type *sph_root; /* root of the tree */ \ +} + +#define SPLAY_INITIALIZER(root) \ + { NULL } + +#define SPLAY_INIT(root) do { \ + (root)->sph_root = NULL; \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_ENTRY(type) \ +struct { \ + struct type *spe_left; /* left element */ \ + struct type *spe_right; /* right element */ \ +} + +#define SPLAY_LEFT(elm, field) (elm)->field.spe_left +#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right +#define SPLAY_ROOT(head) (head)->sph_root +#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) + +/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ +#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_LINKLEFT(head, tmp, field) do { \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_LINKRIGHT(head, tmp, field) do { \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \ + SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \ +} while (/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ + +#define SPLAY_PROTOTYPE(name, type, field, cmp) \ +void name##_SPLAY(struct name *, struct type *); \ +void name##_SPLAY_MINMAX(struct name *, int); \ +struct type *name##_SPLAY_INSERT(struct name *, struct type *); \ +struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \ + \ +/* Finds the node with the same key as elm */ \ +static __inline struct type * \ +name##_SPLAY_FIND(struct name *head, struct type *elm) \ +{ \ + if (SPLAY_EMPTY(head)) \ + return(NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) \ + return (head->sph_root); \ + return (NULL); \ +} \ + \ +static __inline struct type * \ +name##_SPLAY_NEXT(struct name *head, struct type *elm) \ +{ \ + name##_SPLAY(head, elm); \ + if (SPLAY_RIGHT(elm, field) != NULL) { \ + elm = SPLAY_RIGHT(elm, field); \ + while (SPLAY_LEFT(elm, field) != NULL) { \ + elm = SPLAY_LEFT(elm, field); \ + } \ + } else \ + elm = NULL; \ + return (elm); \ +} \ + \ +static __inline struct type * \ +name##_SPLAY_MIN_MAX(struct name *head, int val) \ +{ \ + name##_SPLAY_MINMAX(head, val); \ + return (SPLAY_ROOT(head)); \ +} + +/* Main splay operation. + * Moves node close to the key of elm to top + */ +#define SPLAY_GENERATE(name, type, field, cmp) \ +struct type * \ +name##_SPLAY_INSERT(struct name *head, struct type *elm) \ +{ \ + if (SPLAY_EMPTY(head)) { \ + SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \ + } else { \ + int __comp; \ + name##_SPLAY(head, elm); \ + __comp = (cmp)(elm, (head)->sph_root); \ + if(__comp < 0) { \ + SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\ + SPLAY_RIGHT(elm, field) = (head)->sph_root; \ + SPLAY_LEFT((head)->sph_root, field) = NULL; \ + } else if (__comp > 0) { \ + SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\ + SPLAY_LEFT(elm, field) = (head)->sph_root; \ + SPLAY_RIGHT((head)->sph_root, field) = NULL; \ + } else \ + return ((head)->sph_root); \ + } \ + (head)->sph_root = (elm); \ + return (NULL); \ +} \ + \ +struct type * \ +name##_SPLAY_REMOVE(struct name *head, struct type *elm) \ +{ \ + struct type *__tmp; \ + if (SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) { \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\ + } else { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\ + name##_SPLAY(head, elm); \ + SPLAY_RIGHT((head)->sph_root, field) = __tmp; \ + } \ + return (elm); \ + } \ + return (NULL); \ +} \ + \ +void \ +name##_SPLAY(struct name *head, struct type *elm) \ +{ \ + struct type __node, *__left, *__right, *__tmp; \ + int __comp; \ +\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ + __left = __right = &__node; \ +\ + while ((__comp = (cmp)(elm, (head)->sph_root))) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) < 0){ \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) > 0){ \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ +} \ + \ +/* Splay with either the minimum or the maximum element \ + * Used to find minimum or maximum element in tree. \ + */ \ +void name##_SPLAY_MINMAX(struct name *head, int __comp) \ +{ \ + struct type __node, *__left, *__right, *__tmp; \ +\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ + __left = __right = &__node; \ +\ + while (1) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp < 0){ \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp > 0) { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ +} + +#define SPLAY_NEGINF -1 +#define SPLAY_INF 1 + +#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) +#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) +#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) +#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) +#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \ + : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) +#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \ + : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) + +#define SPLAY_FOREACH(x, name, head) \ + for ((x) = SPLAY_MIN(name, head); \ + (x) != NULL; \ + (x) = SPLAY_NEXT(name, head, x)) + +/* Macros that define a red-back tree */ +#define RB_HEAD(name, type) \ +struct name { \ + struct type *rbh_root; /* root of the tree */ \ +} + +#define RB_INITIALIZER(root) \ + { NULL } + +#define RB_INIT(root) do { \ + (root)->rbh_root = NULL; \ +} while (/*CONSTCOND*/ 0) + +#define RB_BLACK 0 +#define RB_RED 1 +#define RB_ENTRY(type) \ +struct { \ + struct type *rbe_left; /* left element */ \ + struct type *rbe_right; /* right element */ \ + struct type *rbe_parent; /* parent element */ \ + int rbe_color; /* node color */ \ +} + +#define RB_LEFT(elm, field) (elm)->field.rbe_left +#define RB_RIGHT(elm, field) (elm)->field.rbe_right +#define RB_PARENT(elm, field) (elm)->field.rbe_parent +#define RB_COLOR(elm, field) (elm)->field.rbe_color +#define RB_ROOT(head) (head)->rbh_root +#define RB_EMPTY(head) (RB_ROOT(head) == NULL) + +#define RB_SET(elm, parent, field) do { \ + RB_PARENT(elm, field) = parent; \ + RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \ + RB_COLOR(elm, field) = RB_RED; \ +} while (/*CONSTCOND*/ 0) + +#define RB_SET_BLACKRED(black, red, field) do { \ + RB_COLOR(black, field) = RB_BLACK; \ + RB_COLOR(red, field) = RB_RED; \ +} while (/*CONSTCOND*/ 0) + +#ifndef RB_AUGMENT +#define RB_AUGMENT(x) +#endif + +#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \ + (tmp) = RB_RIGHT(elm, field); \ + if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) { \ + RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } else \ + (head)->rbh_root = (tmp); \ + RB_LEFT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if ((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ +} while (/*CONSTCOND*/ 0) + +#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \ + (tmp) = RB_LEFT(elm, field); \ + if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) { \ + RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } else \ + (head)->rbh_root = (tmp); \ + RB_RIGHT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if ((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ +} while (/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ +#define RB_PROTOTYPE(name, type, field, cmp) \ +void name##_RB_INSERT_COLOR(struct name *, struct type *); \ +void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\ +struct type *name##_RB_REMOVE(struct name *, struct type *); \ +struct type *name##_RB_INSERT(struct name *, struct type *); \ +struct type *name##_RB_FIND(struct name *, struct type *); \ +struct type *name##_RB_NEXT(struct name *, struct type *); \ +struct type *name##_RB_MINMAX(struct name *, int); \ + \ + +/* Main rb operation. + * Moves node close to the key of elm to top + */ +#define RB_GENERATE(name, type, field, cmp) \ +void \ +name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \ +{ \ + struct type *parent, *gparent, *tmp; \ + while ((parent = RB_PARENT(elm, field)) && \ + RB_COLOR(parent, field) == RB_RED) { \ + gparent = RB_PARENT(parent, field); \ + if (parent == RB_LEFT(gparent, field)) { \ + tmp = RB_RIGHT(gparent, field); \ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field);\ + elm = gparent; \ + continue; \ + } \ + if (RB_RIGHT(parent, field) == elm) { \ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_RIGHT(head, gparent, tmp, field); \ + } else { \ + tmp = RB_LEFT(gparent, field); \ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field);\ + elm = gparent; \ + continue; \ + } \ + if (RB_LEFT(parent, field) == elm) { \ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_LEFT(head, gparent, tmp, field); \ + } \ + } \ + RB_COLOR(head->rbh_root, field) = RB_BLACK; \ +} \ + \ +void \ +name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \ +{ \ + struct type *tmp; \ + while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \ + elm != RB_ROOT(head)) { \ + if (RB_LEFT(parent, field) == elm) { \ + tmp = RB_RIGHT(parent, field); \ + if (RB_COLOR(tmp, field) == RB_RED) { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + tmp = RB_RIGHT(parent, field); \ + } \ + if ((RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\ + (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } else { \ + if (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\ + struct type *oleft; \ + if ((oleft = RB_LEFT(tmp, field)))\ + RB_COLOR(oleft, field) = RB_BLACK;\ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_RIGHT(head, tmp, oleft, field);\ + tmp = RB_RIGHT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field);\ + RB_COLOR(parent, field) = RB_BLACK; \ + if (RB_RIGHT(tmp, field)) \ + RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + elm = RB_ROOT(head); \ + break; \ + } \ + } else { \ + tmp = RB_LEFT(parent, field); \ + if (RB_COLOR(tmp, field) == RB_RED) { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + tmp = RB_LEFT(parent, field); \ + } \ + if ((RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\ + (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } else { \ + if (RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\ + struct type *oright; \ + if ((oright = RB_RIGHT(tmp, field)))\ + RB_COLOR(oright, field) = RB_BLACK;\ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_LEFT(head, tmp, oright, field);\ + tmp = RB_LEFT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field);\ + RB_COLOR(parent, field) = RB_BLACK; \ + if (RB_LEFT(tmp, field)) \ + RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + elm = RB_ROOT(head); \ + break; \ + } \ + } \ + } \ + if (elm) \ + RB_COLOR(elm, field) = RB_BLACK; \ +} \ + \ +struct type * \ +name##_RB_REMOVE(struct name *head, struct type *elm) \ +{ \ + struct type *child, *parent, *old = elm; \ + int color; \ + if (RB_LEFT(elm, field) == NULL) \ + child = RB_RIGHT(elm, field); \ + else if (RB_RIGHT(elm, field) == NULL) \ + child = RB_LEFT(elm, field); \ + else { \ + struct type *left; \ + elm = RB_RIGHT(elm, field); \ + while ((left = RB_LEFT(elm, field))) \ + elm = left; \ + child = RB_RIGHT(elm, field); \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if (child) \ + RB_PARENT(child, field) = parent; \ + if (parent) { \ + if (RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = child; \ + if (RB_PARENT(elm, field) == old) \ + parent = elm; \ + (elm)->field = (old)->field; \ + if (RB_PARENT(old, field)) { \ + if (RB_LEFT(RB_PARENT(old, field), field) == old)\ + RB_LEFT(RB_PARENT(old, field), field) = elm;\ + else \ + RB_RIGHT(RB_PARENT(old, field), field) = elm;\ + RB_AUGMENT(RB_PARENT(old, field)); \ + } else \ + RB_ROOT(head) = elm; \ + RB_PARENT(RB_LEFT(old, field), field) = elm; \ + if (RB_RIGHT(old, field)) \ + RB_PARENT(RB_RIGHT(old, field), field) = elm; \ + if (parent) { \ + left = parent; \ + do { \ + RB_AUGMENT(left); \ + } while ((left = RB_PARENT(left, field))); \ + } \ + goto color; \ + } \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if (child) \ + RB_PARENT(child, field) = parent; \ + if (parent) { \ + if (RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = child; \ +color: \ + if (color == RB_BLACK) \ + name##_RB_REMOVE_COLOR(head, parent, child); \ + return (old); \ +} \ + \ +/* Inserts a node into the RB tree */ \ +struct type * \ +name##_RB_INSERT(struct name *head, struct type *elm) \ +{ \ + struct type *tmp; \ + struct type *parent = NULL; \ + int comp = 0; \ + tmp = RB_ROOT(head); \ + while (tmp) { \ + parent = tmp; \ + comp = (cmp)(elm, parent); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + RB_SET(elm, parent, field); \ + if (parent != NULL) { \ + if (comp < 0) \ + RB_LEFT(parent, field) = elm; \ + else \ + RB_RIGHT(parent, field) = elm; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = elm; \ + name##_RB_INSERT_COLOR(head, elm); \ + return (NULL); \ +} \ + \ +/* Finds the node with the same key as elm */ \ +struct type * \ +name##_RB_FIND(struct name *head, struct type *elm) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + int comp; \ + while (tmp) { \ + comp = cmp(elm, tmp); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (NULL); \ +} \ + \ +struct type * \ +name##_RB_NEXT(struct name *head, struct type *elm) \ +{ \ + if (RB_RIGHT(elm, field)) { \ + elm = RB_RIGHT(elm, field); \ + while (RB_LEFT(elm, field)) \ + elm = RB_LEFT(elm, field); \ + } else { \ + if (RB_PARENT(elm, field) && \ + (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else { \ + while (RB_PARENT(elm, field) && \ + (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ +} \ + \ +struct type * \ +name##_RB_MINMAX(struct name *head, int val) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + struct type *parent = NULL; \ + while (tmp) { \ + parent = tmp; \ + if (val < 0) \ + tmp = RB_LEFT(tmp, field); \ + else \ + tmp = RB_RIGHT(tmp, field); \ + } \ + return (parent); \ +} + +#define RB_NEGINF -1 +#define RB_INF 1 + +#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) +#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) +#define RB_FIND(name, x, y) name##_RB_FIND(x, y) +#define RB_NEXT(name, x, y) name##_RB_NEXT(x, y) +#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) +#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) + +#define RB_FOREACH(x, name, head) \ + for ((x) = RB_MIN(name, head); \ + (x) != NULL; \ + (x) = name##_RB_NEXT(head, x)) + +#endif /* _SYS_TREE_H_ */