Index: linux-2.5/arch/i386/kernel/entry.S =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/i386/kernel/entry.S,v retrieving revision 1.86 diff -u -p -r1.86 entry.S --- linux-2.5/arch/i386/kernel/entry.S 23 May 2004 05:03:15 -0000 1.86 +++ linux-2.5/arch/i386/kernel/entry.S 13 Jul 2004 02:36:04 -0000 @@ -157,12 +157,19 @@ do_lcall: movl %edx,EIP(%ebp) # Now we move them to their "normal" places movl %ecx,CS(%ebp) # GET_THREAD_INFO_WITH_ESP(%ebp) # GET_THREAD_INFO + /* call gates cannot run with SECCOMP enabled */ + testw $_TIF_SECCOMP,TI_flags(%ebp) + jnz sigkill movl TI_exec_domain(%ebp), %edx # Get the execution domain call *EXEC_DOMAIN_handler(%edx) # Call the handler for the domain addl $4, %esp popl %eax jmp resume_userspace +sigkill: + pushl $9 + call do_exit + ENTRY(lcall27) pushfl # We get a different stack layout with call # gates, which has to be cleaned up later.. @@ -258,7 +265,7 @@ sysenter_past_esp: cmpl $(nr_syscalls), %eax jae syscall_badsys - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) jnz syscall_trace_entry call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) @@ -281,7 +288,7 @@ ENTRY(system_call) cmpl $(nr_syscalls), %eax jae syscall_badsys # system call tracing in operation - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) jnz syscall_trace_entry syscall_call: call *sys_call_table(,%eax,4) Index: linux-2.5/arch/i386/kernel/ptrace.c =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/i386/kernel/ptrace.c,v retrieving revision 1.24 diff -u -p -r1.24 ptrace.c --- linux-2.5/arch/i386/kernel/ptrace.c 31 May 2004 06:17:04 -0000 1.24 +++ linux-2.5/arch/i386/kernel/ptrace.c 13 Jul 2004 02:33:12 -0000 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -526,6 +527,10 @@ out: __attribute__((regparm(3))) void do_syscall_trace(struct pt_regs *regs, int entryexit) { + /* do the secure computing check first */ + if (unlikely(test_thread_flag(TIF_SECCOMP))) + secure_computing(regs->orig_eax); + if (unlikely(current->audit_context)) { if (!entryexit) audit_syscall_entry(current, regs->orig_eax, Index: linux-2.5/fs/proc/base.c =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/fs/proc/base.c,v retrieving revision 1.73 diff -u -p -r1.73 base.c --- linux-2.5/fs/proc/base.c 22 Jun 2004 15:27:21 -0000 1.73 +++ linux-2.5/fs/proc/base.c 13 Jul 2004 02:33:12 -0000 @@ -32,6 +32,7 @@ #include #include #include +#include /* * For hysterical raisins we keep the same inumbers as in the old procfs. @@ -48,6 +49,7 @@ enum pid_directory_inos { PROC_TGID_TASK, PROC_TGID_STATUS, PROC_TGID_MEM, + PROC_TGID_SECCOMP, PROC_TGID_CWD, PROC_TGID_ROOT, PROC_TGID_EXE, @@ -71,6 +73,7 @@ enum pid_directory_inos { PROC_TID_INO, PROC_TID_STATUS, PROC_TID_MEM, + PROC_TID_SECCOMP, PROC_TID_CWD, PROC_TID_ROOT, PROC_TID_EXE, @@ -113,6 +116,7 @@ static struct pid_entry tgid_base_stuff[ E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), + E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), @@ -135,6 +139,7 @@ static struct pid_entry tid_base_stuff[] E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), + E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), @@ -689,6 +694,58 @@ static struct inode_operations proc_mem_ .permission = proc_permission, }; +static ssize_t seccomp_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + struct task_struct * tsk = proc_task(file->f_dentry->d_inode); + char __buf[20]; + loff_t __ppos = *ppos; + size_t len; + + len = sprintf(__buf, "%u\n", tsk->seccomp_mode) + 1; + if (__ppos >= len) + return 0; + if (count > len-__ppos) + count = len-__ppos; + if (copy_to_user(buf, __buf + __ppos, count)) + return -EFAULT; + *ppos += count; + return count; +} + +static ssize_t seccomp_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + struct task_struct * tsk = proc_task(file->f_dentry->d_inode); + char __buf[20], * end; + unsigned int seccomp_mode; + + /* can set it only once to be even more secure */ + if (unlikely(tsk->seccomp_mode)) + return -EPERM; + + memset(__buf, 0, 20); + if (count > 19) + count = 19; + if (copy_from_user(__buf, buf, count)) + return -EFAULT; + seccomp_mode = simple_strtoul(__buf, &end, 0); + if (*end == '\n') + end++; + if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { + tsk->seccomp_mode = seccomp_mode; + set_tsk_thread_flag(tsk, TIF_SECCOMP); + } + if (unlikely(!(end - __buf))) + return -EIO; + return end - __buf; +} + +static struct file_operations proc_seccomp_operations = { + .read = seccomp_read, + .write = seccomp_write, +}; + static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; @@ -1342,6 +1399,10 @@ static struct dentry *proc_pident_lookup inode->i_op = &proc_mem_inode_operations; inode->i_fop = &proc_mem_operations; break; + case PROC_TID_SECCOMP: + case PROC_TGID_SECCOMP: + inode->i_fop = &proc_seccomp_operations; + break; case PROC_TID_MOUNTS: case PROC_TGID_MOUNTS: inode->i_fop = &proc_mounts_operations; Index: linux-2.5/include/asm-i386/thread_info.h =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-i386/thread_info.h,v retrieving revision 1.20 diff -u -p -r1.20 thread_info.h --- linux-2.5/include/asm-i386/thread_info.h 19 May 2004 23:34:45 -0000 1.20 +++ linux-2.5/include/asm-i386/thread_info.h 13 Jul 2004 02:33:12 -0000 @@ -144,6 +144,7 @@ static inline unsigned long current_stac #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ #define TIF_IRET 5 /* return with iret */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ +#define TIF_SECCOMP 8 /* secure computing */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define _TIF_SYSCALL_TRACE (1< + * + * This defines a simple but solid secure-computing mode. + */ + +#include +#include +#include + +/* #define SECCOMP_DEBUG 1 */ + +/* + * Secure computing mode 1 allows only read/write/exit/sigreturn. + * To be fully secure this must be combined with rlimit + * to limit the stack allocations too. + */ +static int mode1_syscalls[] = { + __NR_read, __NR_write, __NR_exit, + /* + * Allow either sigreturn or rt_sigreturn, newer archs + * like x86-64 only defines __NR_rt_sigreturn. + */ +#ifdef __NR_sigreturn + __NR_sigreturn, +#else + __NR_rt_sigreturn, +#endif +}; + +void secure_computing(int this_syscall) +{ + int mode = current->seccomp_mode; + int * syscall; + + switch (mode) { + case 1: + for (syscall = mode1_syscalls; + syscall < mode1_syscalls + sizeof(mode1_syscalls)/sizeof(int); + syscall++) + if (*syscall == this_syscall) + return; + break; + default: + BUG(); + } + +#ifdef SECCOMP_DEBUG + dump_stack(); +#endif + do_exit(SIGKILL); +} diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids 2.6.7/include/linux/seccomp.h seccomp/include/linux/seccomp.h --- 2.6.7/include/linux/seccomp.h 1970-01-01 01:00:00.000000000 +0100 +++ seccomp/include/linux/seccomp.h 2004-07-05 02:42:14.764887872 +0200 @@ -0,0 +1,8 @@ +#ifndef _LINUX_SECCOMP_H +#define _LINUX_SECCOMP_H + +#define NR_SECCOMP_MODES 1 + +extern void secure_computing(int); + +#endif /* _LINUX_SECCOMP_H */