forkできない環境をx86_64 linuxで その1

はじめに

YAPC::Asia 2010 Tokyoで子飼さんのInside LLEvalを聞いて,同じような環境を普段使ってるlinux上で作ってみたいなと思いました.FreeBSD::i386::Ptrace - Ptrace for FreeBSD-i386 - metacpan.orgをみてptraceを使えばできそうだと考えましたが,FreeBSDLinuxでは使い方が違うし,Linuxでもi386x86_64でも違うことに気づいたので,まずは調べたところまでをまとめることにしました.

ptraceでsyscallをリストアップ

いろいろ探した結果,Playing with ptrace, Part I | Linux Journalが参考になることがわかりました.でもこの記事はi386について書いてあってx86_64用のレジスタの情報がよくわかりませんでした.さらに探してtracef - function call tracerというプログラムの中身をみたところ

#if defined(__i386__) || defined(__x86_64__)
  #define BREAKPOINT_INSN      0xCC
  #define BREAKPOINT_INSN_LEN     1
  #if defined(__i386__) 
    #define SP_     esp
    #define PC_     eip
    #define RETVAL_ eax
  #else
    // x86_64
    #define SP_     rsp
    #define PC_     rip
    #define RETVAL_ rax
  #endif  
#else
  #error unknown arch
#endif

というのを見つけたので,orig_eaxの代わりにorig_raxを使えばよいということがわかりました.最後にx86_64syscallのリストを/usr/include/bits/syscall.hに見つけました.というところで動かしてみたサンプルプログラムはこんな感じになります.

#include <sys/ptrace.h>
#include <sys/types.h>
#include <inttypes.h>
#include <sys/wait.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/reg.h>

const char *name[] = {
  "read",
  "write",
  "open",
  "close",
  "stat",
  "fstat",
  "lstat",
  "poll",
  "lseek",
  "mmap",
  "mprotect",
  "munmap",
  "brk",
  "rt_sigaction",
  "rt_sigprocmask",
  "rt_sigreturn",
  "ioctl",
  "pread64",
  "pwrite64",
  "readv",
  "writev",
  "access",
  "pipe",
  "select",
  "sched_yield",
  "mremap",
  "msync",
  "mincore",
  "madvise",
  "shmget",
  "shmat",
  "shmctl",
  "dup",
  "dup2",
  "pause",
  "nanosleep",
  "getitimer",
  "alarm",
  "setitimer",
  "getpid",
  "sendfile",
  "socket",
  "connect",
  "accept",
  "sendto",
  "recvfrom",
  "sendmsg",
  "recvmsg",
  "shutdown",
  "bind",
  "listen",
  "getsockname",
  "getpeername",
  "socketpair",
  "setsockopt",
  "getsockopt",
  "clone",
  "fork",
  "vfork",
  "execve",
  "exit",
  "wait4",
  "kill",
  "uname",
  "semget",
  "semop",
  "semctl",
  "shmdt",
  "msgget",
  "msgsnd",
  "msgrcv",
  "msgctl",
  "fcntl",
  "flock",
  "fsync",
  "fdatasync",
  "truncate",
  "ftruncate",
  "getdents",
  "getcwd",
  "chdir",
  "fchdir",
  "rename",
  "mkdir",
  "rmdir",
  "creat",
  "link",
  "unlink",
  "symlink",
  "readlink",
  "chmod",
  "fchmod",
  "chown",
  "fchown",
  "lchown",
  "umask",
  "gettimeofday",
  "getrlimit",
  "getrusage",
  "sysinfo",
  "times",
  "ptrace",
  "getuid",
  "syslog",
  "getgid",
  "setuid",
  "setgid",
  "geteuid",
  "getegid",
  "setpgid",
  "getppid",
  "getpgrp",
  "setsid",
  "setreuid",
  "setregid",
  "getgroups",
  "setgroups",
  "setresuid",
  "getresuid",
  "setresgid",
  "getresgid",
  "getpgid",
  "setfsuid",
  "setfsgid",
  "getsid",
  "capget",
  "capset",
  "rt_sigpending",
  "rt_sigtimedwait",
  "rt_sigqueueinfo",
  "rt_sigsuspend",
  "sigaltstack",
  "utime",
  "mknod",
  "uselib",
  "personality",
  "ustat",
  "statfs",
  "fstatfs",
  "sysfs",
  "getpriority",
  "setpriority",
  "sched_setparam",
  "sched_getparam",
  "sched_setscheduler",
  "sched_getscheduler",
  "sched_get_priority_max",
  "sched_get_priority_min",
  "sched_rr_get_interval",
  "mlock",
  "munlock",
  "mlockall",
  "munlockall",
  "vhangup",
  "modify_ldt",
  "pivot_root",
  "_sysctl",
  "prctl",
  "arch_prctl",
  "adjtimex",
  "setrlimit",
  "chroot",
  "sync",
  "acct",
  "settimeofday",
  "mount",
  "umount2",
  "swapon",
  "swapoff",
  "reboot",
  "sethostname",
  "setdomainname",
  "iopl",
  "ioperm",
  "create_module",
  "init_module",
  "delete_module",
  "get_kernel_syms",
  "query_module",
  "quotactl",
  "nfsservctl",
  "getpmsg",
  "putpmsg",
  "afs_syscall",
  "tuxcall",
  "security",
  "gettid",
  "readahead",
  "setxattr",
  "lsetxattr",
  "fsetxattr",
  "getxattr",
  "lgetxattr",
  "fgetxattr",
  "listxattr",
  "llistxattr",
  "flistxattr",
  "removexattr",
  "lremovexattr",
  "fremovexattr",
  "tkill",
  "time",
  "futex",
  "sched_setaffinity",
  "sched_getaffinity",
  "set_thread_area",
  "io_setup",
  "io_destroy",
  "io_getevents",
  "io_submit",
  "io_cancel",
  "get_thread_area",
  "lookup_dcookie",
  "epoll_create",
  "epoll_ctl_old",
  "epoll_wait_old",
  "remap_file_pages",
  "getdents64",
  "set_tid_address",
  "restart_syscall",
  "semtimedop",
  "fadvise64",
  "timer_create",
  "timer_settime",
  "timer_gettime",
  "timer_getoverrun",
  "timer_delete",
  "clock_settime",
  "clock_gettime",
  "clock_getres",
  "clock_nanosleep",
  "exit_group",
  "epoll_wait",
  "epoll_ctl",
  "tgkill",
  "utimes",
  "vserver",
  "mbind",
  "set_mempolicy",
  "get_mempolicy",
  "mq_open",
  "mq_unlink",
  "mq_timedsend",
  "mq_timedreceive",
  "mq_notify",
  "mq_getsetattr",
  "kexec_load",
  "waitid",
  "add_key",
  "request_key",
  "keyctl",
  "ioprio_set",
  "ioprio_get",
  "inotify_init",
  "inotify_add_watch",
  "inotify_rm_watch",
  "migrate_pages",
  "openat",
  "mkdirat",
  "mknodat",
  "fchownat",
  "futimesat",
  "newfstatat",
  "unlinkat",
  "renameat",
  "linkat",
  "symlinkat",
  "readlinkat",
  "fchmodat",
  "faccessat",
  "pselect6",
  "ppoll",
  "unshare",
  "set_robust_list",
  "get_robust_list",
  "splice",
  "tee",
  "sync_file_range",
  "vmsplice",
  "move_pages",
  "utimensat",
  "epoll_pwait",
  "signalfd",
  "timerfd_create",
  "eventfd",
  "fallocate",
};

int main()
{
  pid_t child;
  uint64_t orig_rax;
  child = fork();
  int status;
  if(child == 0) {
    ptrace(PTRACE_TRACEME, 0, NULL, NULL);
    execl("/bin/ls", "ls", NULL);
  }
  else {
    while(1) {
      wait(&status);
      if(WIFEXITED(status))
        break;
      orig_rax = ptrace(PTRACE_PEEKUSER, child, 8 * ORIG_RAX, NULL);
      printf("The child made a system call %s\n", name[orig_rax]);
      ptrace(PTRACE_SYSCALL, child, NULL, NULL);
    }
  }
  return 0;
}

このプログラムをtest.cとすると,

gcc -Wall test.c

として,できたa.outを動かしてみた結果は次のようになります.

$ ./a.out
The child made a system call execve
The child made a system call brk
The child made a system call brk
The child made a system call mmap
The child made a system call mmap
...
a.out  test2.c	test3.c  test.c
The child made a system call write
The child made a system call close
The child made a system call close
The child made a system call munmap
The child made a system call munmap
The child made a system call exit_group

こんな方法でcloneとforkとvforkを探して,見つかったらプロセスを止めればいいのかなと思っています.

今後の課題

  • 疑問:レジスタの使い方は何をみればいいんだろう.i386x86_64,もしかしたらkernelのバージョンによっても違うんだろうか.
  • 問題:xsの使い方を完全に忘れてしまっているので,どうやってperlモジュールにすればいいのか悩むところ.
  • 疑問:PTRACE_CONTとPTRACE_SYSCALLの違いがわからない.ぐぐって探してるのが悪いのかしら.でもman 2 ptraceででてこないのは何が入ってないんだろう
  • 疑問:FreeBSD::i386::Ptraceのnofork.plをみると,%SYSにシステムコール一覧が入ってるみたいなんだけど,これどうやって作ってるんだろう
  • 問題:llevalの話の中では,'1 while 1'の実行を1秒で止めるとなっているんだけど,これはどうやって実現してるのかなぁ.OPcodeを解析するのかしらん.

たぶん続く