笔记主要参考Playing with ptrace, 原文比较久远,而且是在程序是在32位机器上跑的,修改成了64位机器能正确运行的版本。

ptrace() 系统调用提供了一个机制供一个父进程可以追踪和控制其他进程,主要用来实现断点debug以及系统调用追踪等功能。 函数原型如下:

1
2
#include <sys/ptrace.h>
long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);

https://man7.org/linux/man-pages/man2/ptrace.2.html

要更容易的看懂例程,需要先了解以下x86的寄存器,32bit和64bit的寄存器存在一些不同,如下图(来自参考3 page27)

General Purpose Registers in 64-Bit Mode

32bit下常用的eax, ebx在64bit下对应为 rax, rbx,但是使用eax依然可以访问rax的低32位。

在32位情况下,系统调用号被放到eax, 传给系统调用的参数被依次放到 ebx, ecx, edx, esi, edi。在64位系统下,情况有些不同,我们使用godblolt将下面的C程序翻译成汇编代码看一下寄存器使用情况,

1
2
3
4
5
6
#include<unistd.h>
int main() {

    write(1, "hello!", 6);
    return 0;
}

翻译出来的汇编代码:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
.LC0:
        .string "hello!"
main:
        push    rbp
        mov     rbp, rsp
        mov     edx, 6
        mov     esi, OFFSET FLAT:.LC0
        mov     edi, 1
        call    write
        mov     eax, 0
        pop     rbp
        ret

可见64位上,系统调用write()的三个参数依次被放到了edi, esi, edx 中,这里只使用了寄存器的低32位,所以还是e开头。各架构在系统调用时用到的寄存器如下:

archsyscall numberreturnarg0arg1arg2arg3arg4arg5
armr7r0r0r1r2r3r4r5
arm64x8x0x0x1x2x3x4x5
x86eaxeaxebxecxedxesiediebp
x86_64raxraxrdirsirdxr10r8r9

完整的寄存器常用法表见: https://web.stanford.edu/class/cs107/guide/x86-64.html

知道了这几个寄存器的一般用法,就可以开始看例子了:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/reg.h>


int main()
{   pid_t child;    
    long orig_rax;
    child = fork();
    if(child == 0) {
        ptrace(PTRACE_TRACEME, 0, NULL, NULL);
        execl("/bin/ls", "ls", NULL);
    }
    else {
        wait(NULL);
        orig_rax = ptrace(PTRACE_PEEKUSER,
                          child, 8 * ORIG_RAX,
                          NULL);
        printf("The child made a "
               "system call %ld\n", orig_rax);
        ptrace(PTRACE_CONT, child, NULL, NULL);
    }
    return 0;
}

程序的主进程会追踪子进程的系统调用,将其系统调用号打印出来,然后调用ptrace(PTRACE_CONT,...) 让子进程继续运行。当系统调用发生时,内核会保存rax的原始内容到内存中,里面的内容就是系统调用号,可以从子进程的USER段读取出来,其偏移地址就是我们传入的8 * ORIG_RAXORIG_RAX定义在sys/reg.h文件中,其定义为#define ORIG_RAX 15 ,因为64bit系统里,USER中的每个数据为8个字节,而orig_rax是第15个数据。USER的数据结构体定义在/usr/include/x86_64-linux-gnu/sys/user.h struct user_regs_struct 可以看一下。

运行输出

1
2
The child made a system call 59
...

查阅系统调用号表 https://github.com/torvalds/linux/blob/master/arch/x86/entry/syscalls/syscall_64.tbl , 系统调用为execve

看第二个例程:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/reg.h>
#include <sys/syscall.h>
#include <sys/user.h>
#include <stdio.h>

int main()
{   pid_t child;
    long orig_rax, rax;
    long params[3];
    int status;
    int insyscall = 0;
    struct user_regs_struct regs;
    child = fork();
    if(child == 0) {
        ptrace(PTRACE_TRACEME, 0, NULL, NULL);
        execl("/bin/ls", "ls", NULL);
    }
    else {
    
       while(1) {
          wait(&status);
          if(WIFEXITED(status))
              break;
          orig_rax = ptrace(PTRACE_PEEKUSER,
                            child, 8 * ORIG_RAX,
                            NULL);
          if(orig_rax == SYS_write) {
              if(insyscall == 0) {
                 /* Syscall entry */
                 insyscall = 1;
                 ptrace(PTRACE_GETREGS, child,
                        NULL, &regs);
                 printf("Write called with "
                        "%ld, %ld, %ld\n",
                        regs.rdi, regs.rsi,
                        regs.rdx);
             }
             else { /* Syscall exit */
                 rax = ptrace(PTRACE_PEEKUSER,
                              child, 8 * RAX,
                              NULL);
                 printf("Write returned "
                        "with %ld\n", rax);
                 insyscall = 0;
             }
          }
          ptrace(PTRACE_SYSCALL, child,
                 NULL, NULL);
       }
   }
   return 0;
}

和第一个例子区别不大,这段程序使用 ptrace(PTRACE_GETREGS, ...)函数获取系统调用时所有寄存器的值, 并打印出rdi, rsi, rdx中的值也就是write()的三个参数值, 在write()返回的时候打印出rax中的返回值。

输出类似于:

1
2
3
4
5
6
Write called with 1, 9348640, 99
a.out	      foo.c	      libbar.so		libnice.a     mod1.cpp	nice.cpp	 rtsched	  test_reg.s  wrapjack
Write returned with 99
Write called with 1, 9348640, 103
bar.c	      foo.map	      libdemo.a		libtom.so     mod1.o	nice.o		 rtsched.cpp	  tlpi-dist   wrapjack2
Write returned with 103

第三例子,修改系统调用的参数,

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/user.h>
#include <sys/syscall.h>
#include <sys/reg.h>

const int long_size = sizeof(long);
void reverse(char *str)
{   
    int i, j;
    char temp;
    for(i = 0, j = strlen(str) - 2;
        i <= j; ++i, --j) {
        temp = str[i];
        str[i] = str[j];
        str[j] = temp;
    }
}

void getdata(pid_t child, long addr, char *str, int len)
{
    char *laddr;
    int i, j;
    union u {
            long val;
            char chars[long_size];
    }data;
    i = 0;
    j = len / long_size;
    laddr = str;
    while(i < j) {
        data.val = ptrace(PTRACE_PEEKDATA,
                          child, addr + i * 8,
                          NULL);
        memcpy(laddr, data.chars, long_size);
        ++i;
        laddr += long_size;
    }
    j = len % long_size;
    if(j != 0) {
        data.val = ptrace(PTRACE_PEEKDATA,
                          child, addr + i * 8,
                          NULL);
        memcpy(laddr, data.chars, j);
    }
    str[len] = '\0';
}

void putdata(pid_t child, long addr, char *str, int len)
{   
    char *laddr;
    int i, j;
    union u {
            long val;
            char chars[long_size];
    }data;
    i = 0;
    j = len / long_size;
    laddr = str;
    while(i < j) {
        memcpy(data.chars, laddr, long_size);
        ptrace(PTRACE_POKEDATA, child,
               addr + i * 8, data.val);
        ++i;
        laddr += long_size;
    }
    j = len % long_size;
    if(j != 0) {
        memcpy(data.chars, laddr, j);
        ptrace(PTRACE_POKEDATA, child,
               addr + i * 8, data.val);
    }
}

int main()
{
    pid_t child;
    child = fork();
    if(child == 0) {
        ptrace(PTRACE_TRACEME, 0, NULL, NULL);
        execl("/bin/ls", "ls", NULL);
    }
    else {
        long orig_rax;
        long params[3];
        int status;
        char *str, *laddr;
        int toggle = 0;
        while(1) {
            wait(&status);
            if(WIFEXITED(status))
                break;
            orig_rax = ptrace(PTRACE_PEEKUSER,
                            child, 8 * ORIG_RAX,
                            NULL);
            if(orig_rax == SYS_write) {
                if(toggle == 0) {
                toggle = 1;
                params[0] = ptrace(PTRACE_PEEKUSER,
                                    child, 8 * RDI,
                                    NULL);
                params[1] = ptrace(PTRACE_PEEKUSER,
                                    child, 8 * RSI,
                                    NULL);
                params[2] = ptrace(PTRACE_PEEKUSER,
                                    child, 8 * RDX,
                                    NULL);
                str = (char *)calloc((params[2]+1), sizeof(char));
                getdata(child, params[1], str,
                        params[2]);
                reverse(str);
                putdata(child, params[1], str,
                        params[2]);
                }
                else {
                toggle = 0;
                }
            }
        ptrace(PTRACE_SYSCALL, child, NULL, NULL);
        }
    }
    return 0;
}

输出类似于下面这样

1
2
3
4
5
6
7
$ ls

a.out   fun.c 

$ ./a.out

c.nuf   tuo.a

程序使用 PTRACE_POKEDATA 修改传给write()的参数,ssize_t write(int fd, const void *buf, size_t count) 三个参数分别为要写入的文件描述符,buf指针, 写入的字节数。 getdata()的作用是调用ptrace(PTRACE_PEEKDATA,..)以8个字节为单位取得参数buf指向的数据后,写入str指向的地址。之后反转字符串再调用putdata()使用ptrace(PTRACE_POKEDATA,..)写回去,就实现了上面的效果。

前几个例子,子进程开始时都调用了 ptrace(PTRACE_TRACEME, 0, NULL, NULL)来告诉内核对其追踪,但不是每个程序都会去调用这个的,而且我们经常随便拿来一个程序就用strace命令跟踪系统调用,那些程序里面也不会都调用了PTRACE_TRACEME吧。 要trace一个既有的进程也是可以的,只要使用ptrace(PTRACE_ATTACH, ..)就可以了。接下来我们就用PTRACE_ATTACH来跟踪一个进程。

首先写一个简单的程序供我们跟踪, 这个小程序会每两秒计数并输出。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
// counter

#include<stdio.h>

int main() {
    for (int i = 0; i < 100; ++i) {
        printf("Counter: %d\n", i);
        sleep(2);
    }
    return 0;
}

然后使用PTRACE_ATTACH写我们的追踪程序, 结合一下之前的例子,将被追踪程序的输出也截取出来。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
// trace_counter

#include <stdio.h>
#include <stdlib.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/reg.h>
#include <sys/user.h>
#include <sys/syscall.h>
#include <string.h>

int long_size = sizeof(long);

void getdata(pid_t pid, long addr, char *str, int len)
{
    char *laddr;
    int i, j;
    union u {
            long val;
            char chars[long_size];
    }data;
    i = 0;
    j = len / long_size;
    laddr = str;
    while(i < j) {
        data.val = ptrace(PTRACE_PEEKDATA,
                          pid, addr + i * 8,
                          NULL);
        memcpy(laddr, data.chars, long_size);
        ++i;
        laddr += long_size;
    }
    j = len % long_size;
    if(j != 0) {
        data.val = ptrace(PTRACE_PEEKDATA,
                          pid, addr + i * 8,
                          NULL);
        memcpy(laddr, data.chars, j);
    }
    str[len] = '\0';
}


int main(int argc, char *argv[])
{   
    long orig_rax, rax;
    int status;
    int insyscall = 0;
    pid_t traced_process;
    struct user_regs_struct regs;
    long ins;
    if(argc != 2) {
        printf("Usage: %s <pid to be traced>\n", argv[0]);
        exit(1);
    }
    traced_process = atoi(argv[1]);
    ptrace(PTRACE_ATTACH, traced_process, NULL, NULL);

    while(1) {
        wait(&status);
        if(WIFEXITED(status))
            break;
        orig_rax = ptrace(PTRACE_PEEKUSER, traced_process, 8 * ORIG_RAX, NULL);
        if(orig_rax == SYS_write) {
            if(insyscall == 0) {
                /* Syscall entry */
                insyscall = 1;
                ptrace(PTRACE_GETREGS, traced_process,
                    NULL, &regs);
                printf("Write called with %lld, %lld, %lld\n", regs.rdi, regs.rsi, regs.rdx);
                char *str = (char *)calloc((regs.rdx+1), sizeof(char));
                getdata(traced_process, regs.rsi, str, regs.rdx);
                printf("write string is : %s", str);
            }
            else { /* Syscall exit */
                rax = ptrace(PTRACE_PEEKUSER, traced_process, 8 * RAX, NULL);
                printf("Write returned with %ld\n", rax);
                insyscall = 0;
            }
        }
        ptrace(PTRACE_SYSCALL, traced_process, NULL, NULL);
    }

    ptrace(PTRACE_DETACH, traced_process, NULL, NULL);
    
    return 0;
}

编译两个程序,然后在两个终端运行:

counter 的输出:

1
2
3
4
5
6
7
Counter: 0
Counter: 1
Counter: 2
Counter: 3
Counter: 4
Counter: 5
...

trace_counter 的输出:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
Write called with 1, 28184592, 11
write string is : Counter: 7
Write returned with 11
Write called with 1, 28184592, 11
write string is : Counter: 8
Write returned with 11
Write called with 1, 28184592, 11
write string is : Counter: 9
Write returned with 11
Write called with 1, 28184592, 12
write string is : Counter: 10
Write returned with 12

...

设置断点:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
#include <stdio.h>
#include <stdlib.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/reg.h>
#include <sys/user.h>
#include <sys/syscall.h>
#include <string.h>


const int long_size = sizeof(long);

void getdata(pid_t pid, long addr, char *str, int len)
{
    char *laddr;
    int i, j;
    union u {
            long val;
            char chars[long_size];
    }data;
    i = 0;
    j = len / long_size;
    laddr = str;
    while(i < j) {
        data.val = ptrace(PTRACE_PEEKDATA,
                          pid, addr + i * long_size,
                          NULL);
        memcpy(laddr, data.chars, long_size);
        ++i;
        laddr += long_size;
    }
    j = len % long_size;
    if(j != 0) {
        data.val = ptrace(PTRACE_PEEKDATA,
                          pid, addr + i * long_size,
                          NULL);
        memcpy(laddr, data.chars, j);
    }
    str[len] = '\0';
}

void putdata(pid_t child, long addr, char *str, int len)
{   
    char *laddr;
    int i, j;
    union u {
            long val;
            char chars[long_size];
    }data;
    i = 0;
    j = len / long_size;
    laddr = str;
    while(i < j) {
        memcpy(data.chars, laddr, long_size);
        ptrace(PTRACE_POKEDATA, child,
               addr + i * long_size, data.val);
        ++i;
        laddr += long_size;
    }
    j = len % long_size;
    if(j != 0) {
        memcpy(data.chars, laddr, j);
        ptrace(PTRACE_POKEDATA, child,
               addr + i * long_size, data.val);
    }
}

int main(int argc, char *argv[])
{   pid_t traced_process;
    struct user_regs_struct regs, newregs;
    long ins;
    /* int 0x80, int3 */
    char code[] = {0xcd,0x80,0xcc,0};
    char backup[long_size];
    if(argc != 2) {
        printf("Usage: %s <pid to be traced>\n", argv[0]);
        exit(1);
    }
    traced_process = atoi(argv[1]);
    ptrace(PTRACE_ATTACH, traced_process,
           NULL, NULL);
    wait(NULL);
    ptrace(PTRACE_GETREGS, traced_process,
           NULL, &regs);
    /* Copy instructions into a backup variable */
    getdata(traced_process, regs.rip, backup, long_size);
    /* Put the breakpoint */
    putdata(traced_process, regs.rip, code, 3);
    /* Let the process continue and execute
       the int 3 instruction */
    ptrace(PTRACE_CONT, traced_process, NULL, NULL);
    wait(NULL);
    printf("The process stopped, putting back "
           "the original instructions\n");
    printf("Press <enter> to continue\n");
    getchar();
    putdata(traced_process, regs.rip, backup, long_size);
    /* Setting the eip back to the original
       instruction to let the process continue */
    ptrace(PTRACE_SETREGS, traced_process, NULL, &regs);
    ptrace(PTRACE_DETACH, traced_process, NULL, NULL);
    return 0;
}

设置断点的主要原理是读取程序的rip(instruction pointer)寄存器,将其指向的指令保存下来, 然后用int3(0xCC)指令替换掉原指令,当CPU遇到int3会发送SIGTRAP信号给调试进程,将进程stop。之后再将指令恢复,并将rip指回原处,程序就可以继续执行下去了。

参考:
0. https://www.linuxjournal.com/article/6100

  1. 系统调用号表 https://github.com/torvalds/linux/blob/master/arch/x86/entry/syscalls/syscall_64.tbl
  2. 寄存器列表 https://web.stanford.edu/class/cs107/guide/x86-64.html
  3. https://www.amd.com/system/files/TechDocs/24592.pdf
  4. https://theantway.com/2013/01/notes-for-playing-with-ptrace-on-64-bits-ubuntu-12-10/
  5. https://abda.nl/posts/understanding-ptrace/