main

/* Main entry point */

int main(int argc, char** argv) {

  s32 pid;
  u32 rand_seed;
  int status;
  u8* inst_ratio_str = getenv("AFL_INST_RATIO");

  struct timeval tv;
  struct timezone tz;

  clang_mode = !!getenv(CLANG_ENV_VAR);

  if (isatty(2) && !getenv("AFL_QUIET")) {

    SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
 
  } else be_quiet = 1;

  if (argc < 2) {

    SAYF("\n"
         "This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
         "executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
         "don't want to run this program directly.\n\n"

         "Rarely, when dealing with extremely complex projects, it may be advisable to\n"
         "set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
         "instrumenting every discovered branch.\n\n");

    exit(1);

  }

  gettimeofday(&tv, &tz);

  rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();

  srandom(rand_seed);

  edit_params(argc, argv);

  if (inst_ratio_str) {

    if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) 
      FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");

  }

  if (getenv(AS_LOOP_ENV_VAR))
    FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");

  setenv(AS_LOOP_ENV_VAR, "1", 1);

  /* When compiling with ASAN, we don't have a particularly elegant way to skip
     ASAN-specific branches. But we can probabilistically compensate for
     that... */

  if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
    sanitizer = 1;
    inst_ratio /= 3;
  }

  if (!just_version) add_instrumentation();

  if (!(pid = fork())) {

    execvp(as_params[0], (char**)as_params);
    FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);

  }

  if (pid < 0) PFATAL("fork() failed");

  if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");

  if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);

  exit(WEXITSTATUS(status));

}

首先从环境变量 AFL_INST_RATIO 中初始化 inst_ratio_str
判断是否为 quiet 模式，并设置 be_quiet
随后根据时间和 pid 进行随机种子的设置
然后进入 edit_params 根据 argc 和 argv 修改参数
检查 inst_ratio_str 是否存在，如果存在则检查是否在 0 - 100 之间，不是则抛出错误
判断是否设置了环境变量 AS_LOOP_ENV_VAR ，如果设置了则抛出错误，否则设置为 1
如果使用 ASAN 进行编译，则设置 sanitizer 并将 inst_ratio 除以 3
- 由于 afl 无法识别出 ASAN 的特定分支，因此会导致插入很多无意义的桩代码，所以直接暴力地将插桩概率缩小到原来的 1/3
如果不是只显示 version 的操作，则进入 add_instrumentation 函数，这也是实际的插桩函数
fork 出子进程并使用 execvp 执行处理完的 as_params
- 因为我们在 execvp 的时候会使用 as_params[0] 来完全替换掉当前进程空间中的程序，如果不 fork 则无法进行接下来的 unlink
最后判断如果没有 AFL_KEEP_ASSEMBLY ，则 unlink 掉 modified_file

可以看得出 afl-as 的主要逻辑在 edit_params 和 add_instrumentation 中

edit_params

/* Examine and modify parameters to pass to 'as'. Note that the file name
   is always the last parameter passed by GCC, so we exploit this property
   to keep the code simple. */

static void edit_params(int argc, char** argv) {

  u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
  u32 i;

#ifdef __APPLE__

  u8 use_clang_as = 0;

  /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
     with the code generated by newer versions of clang that are hand-built
     by the user. See the thread here: http://goo.gl/HBWDtn.

     To work around this, when using clang and running without AFL_AS
     specified, we will actually call 'clang -c' instead of 'as -q' to
     compile the assembly file.

     The tools aren't cmdline-compatible, but at least for now, we can
     seemingly get away with this by making only very minor tweaks. Thanks
     to Nico Weber for the idea. */

  if (clang_mode && !afl_as) {

    use_clang_as = 1;

    afl_as = getenv("AFL_CC");
    if (!afl_as) afl_as = getenv("AFL_CXX");
    if (!afl_as) afl_as = "clang";

  }

#endif /* __APPLE__ */

  /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
     is not set. We need to check these non-standard variables to properly
     handle the pass_thru logic later on. */

  if (!tmp_dir) tmp_dir = getenv("TEMP");
  if (!tmp_dir) tmp_dir = getenv("TMP");
  if (!tmp_dir) tmp_dir = "/tmp";

  as_params = ck_alloc((argc + 32) * sizeof(u8*));

  as_params[0] = afl_as ? afl_as : (u8*)"as";

  as_params[argc] = 0;

  for (i = 1; i < argc - 1; i++) {

    if (!strcmp(argv[i], "--64")) use_64bit = 1;
    else if (!strcmp(argv[i], "--32")) use_64bit = 0;

#ifdef __APPLE__

    /* The Apple case is a bit different... */

    if (!strcmp(argv[i], "-arch") && i + 1 < argc) {

      if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
      else if (!strcmp(argv[i + 1], "i386"))
        FATAL("Sorry, 32-bit Apple platforms are not supported.");

    }

    /* Strip options that set the preference for a particular upstream
       assembler in Xcode. */

    if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
      continue;

#endif /* __APPLE__ */

    as_params[as_par_cnt++] = argv[i];

  }

#ifdef __APPLE__

  /* When calling clang as the upstream assembler, append -c -x assembler
     and hope for the best. */

  if (use_clang_as) {

    as_params[as_par_cnt++] = "-c";
    as_params[as_par_cnt++] = "-x";
    as_params[as_par_cnt++] = "assembler";

  }

#endif /* __APPLE__ */

  input_file = argv[argc - 1];

  if (input_file[0] == '-') {

    if (!strcmp(input_file + 1, "-version")) {
      just_version = 1;
      modified_file = input_file;
      goto wrap_things_up;
    }

    if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
      else input_file = NULL;

  } else {

    /* Check if this looks like a standard invocation as a part of an attempt
       to compile a program, rather than using gcc on an ad-hoc .s file in
       a format we may not understand. This works around an issue compiling
       NSS. */

    if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
        strncmp(input_file, "/var/tmp/", 9) &&
        strncmp(input_file, "/tmp/", 5)) pass_thru = 1;

  }

  modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
                               (u32)time(NULL));

wrap_things_up:

  as_params[as_par_cnt++] = modified_file;
  as_params[as_par_cnt]   = NULL;

}

根据最开始的代码注释可以知道，gcc 传递的最后一个参数始终是文件名，利用这个特性可以使得该代码变得更简单。

得到 afl-as 的位置，通过环境变量 AFL_AS
- 在 macos 下，当使用 clang 并没有环境变量 AFL_AS 的时候，会使用 clang -c 来代替 as -q 去编译汇编文件
依次从环境变量 TEMP 和 TMP 中获取临时目录的地址，如果都没有则设为 /tmp
为 as_params 申请大小为 (argc + 32) * sizeof(u8*) 的空间
将 as_params[0] 设为 afl_as 或默认值 “as”
通过 argc 遍历 argv ，主要有以下几个关注点：
- 如果是 macos ，检查 -arch 后跟的是 x86_64 还是 i386 并设置架构，如果都不是则抛出错误
- 如果是其他，则检查是否有 –64 或 –32 来设置架构
- 如果是 clang 则跳过 -q 和 -Q
如果是 clang ，添加 -c -x assembler 选项
检查 input_file（最后一个参数），如果以 - 开头
- 是 -version 的话直接跳到最后返回，在之后做 version 的查询
- 不是 -version 的话则抛出异常
如果 input_file 不以 - 开头，则将它和 tmp_dir 、/var/tmp/ 和 /tmp/ 进行比较，如果不相同则设置 pass_thru 为 1
将 modified_file 设为 tmp_dir + “/.afl-” + getpid() + “-” + (u32)time(NULL) + “.s” 格式
最后将 as_params 填充 modified_file 并返回

总的来说和 afl-gcc 中的 edit_params 所做的事情差不多

add_instrumentation

/* Process input file, generate modified_file. Insert instrumentation in all
   the appropriate places. */

static void add_instrumentation(void) {

  static u8 line[MAX_LINE];

  FILE* inf;
  FILE* outf;
  s32 outfd;
  u32 ins_lines = 0;

  u8  instr_ok = 0, skip_csect = 0, skip_next_label = 0,
      skip_intel = 0, skip_app = 0, instrument_next = 0;

#ifdef __APPLE__

  u8* colon_pos;

#endif /* __APPLE__ */

  if (input_file) {

    inf = fopen(input_file, "r");
    if (!inf) PFATAL("Unable to read '%s'", input_file);

  } else inf = stdin;

  outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);

  if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);

  outf = fdopen(outfd, "w");

  if (!outf) PFATAL("fdopen() failed");  

  while (fgets(line, MAX_LINE, inf)) {

    /* In some cases, we want to defer writing the instrumentation trampoline
       until after all the labels, macros, comments, etc. If we're in this
       mode, and if the line starts with a tab followed by a character, dump
       the trampoline now. */

    if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
        instrument_next && line[0] == '\t' && isalpha(line[1])) {

      fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
              R(MAP_SIZE));

      instrument_next = 0;
      ins_lines++;

    }

    /* Output the actual line, call it a day in pass-thru mode. */

    fputs(line, outf);

    if (pass_thru) continue;

    /* All right, this is where the actual fun begins. For one, we only want to
       instrument the .text section. So, let's keep track of that in processed
       files - and let's set instr_ok accordingly. */

    if (line[0] == '\t' && line[1] == '.') {

      /* OpenBSD puts jump tables directly inline with the code, which is
         a bit annoying. They use a specific format of p2align directives
         around them, so we use that as a signal. */

      if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
          isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;

      if (!strncmp(line + 2, "text\n", 5) ||
          !strncmp(line + 2, "section\t.text", 13) ||
          !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
          !strncmp(line + 2, "section __TEXT,__text", 21)) {
        instr_ok = 1;
        continue; 
      }

      if (!strncmp(line + 2, "section\t", 8) ||
          !strncmp(line + 2, "section ", 8) ||
          !strncmp(line + 2, "bss\n", 4) ||
          !strncmp(line + 2, "data\n", 5)) {
        instr_ok = 0;
        continue;
      }

    }

    /* Detect off-flavor assembly (rare, happens in gdb). When this is
       encountered, we set skip_csect until the opposite directive is
       seen, and we do not instrument. */

    if (strstr(line, ".code")) {

      if (strstr(line, ".code32")) skip_csect = use_64bit;
      if (strstr(line, ".code64")) skip_csect = !use_64bit;

    }

    /* Detect syntax changes, as could happen with hand-written assembly.
       Skip Intel blocks, resume instrumentation when back to AT&T. */

    if (strstr(line, ".intel_syntax")) skip_intel = 1;
    if (strstr(line, ".att_syntax")) skip_intel = 0;

    /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */

    if (line[0] == '#' || line[1] == '#') {

      if (strstr(line, "#APP")) skip_app = 1;
      if (strstr(line, "#NO_APP")) skip_app = 0;

    }

    /* If we're in the right mood for instrumenting, check for function
       names or conditional labels. This is a bit messy, but in essence,
       we want to catch:

         ^main:      - function entry point (always instrumented)
         ^.L0:       - GCC branch label
         ^.LBB0_0:   - clang branch label (but only in clang mode)
         ^\tjnz foo  - conditional branches

       ...but not:

         ^# BB#0:    - clang comments
         ^ # BB#0:   - ditto
         ^.Ltmp0:    - clang non-branch labels
         ^.LC0       - GCC non-branch labels
         ^.LBB0_0:   - ditto (when in GCC mode)
         ^\tjmp foo  - non-conditional jumps

       Additionally, clang and GCC on MacOS X follow a different convention
       with no leading dots on labels, hence the weird maze of #ifdefs
       later on.

     */

    if (skip_intel || skip_app || skip_csect || !instr_ok ||
        line[0] == '#' || line[0] == ' ') continue;

    /* Conditional branch instruction (jnz, etc). We append the instrumentation
       right after the branch (to instrument the not-taken path) and at the
       branch destination label (handled later on). */

    if (line[0] == '\t') {

      if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {

        fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
                R(MAP_SIZE));

        ins_lines++;

      }

      continue;

    }

    /* Label of some sort. This may be a branch destination, but we need to
       tread carefully and account for several different formatting
       conventions. */

#ifdef __APPLE__

    /* Apple: L<whatever><digit>: */

    if ((colon_pos = strstr(line, ":"))) {

      if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {

#else

    /* Everybody else: .L<whatever>: */

    if (strstr(line, ":")) {

      if (line[0] == '.') {

#endif /* __APPLE__ */

        /* .L0: or LBB0_0: style jump destination */

#ifdef __APPLE__

        /* Apple: L<num> / LBB<num> */

        if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
            && R(100) < inst_ratio) {

#else

        /* Apple: .L<num> / .LBB<num> */

        if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
            && R(100) < inst_ratio) {

#endif /* __APPLE__ */

          /* An optimization is possible here by adding the code only if the
             label is mentioned in the code in contexts other than call / jmp.
             That said, this complicates the code by requiring two-pass
             processing (messy with stdin), and results in a speed gain
             typically under 10%, because compilers are generally pretty good
             about not generating spurious intra-function jumps.

             We use deferred output chiefly to avoid disrupting
             .Lfunc_begin0-style exception handling calculations (a problem on
             MacOS X). */

          if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;

        }

      } else {

        /* Function label (always instrumented, deferred mode). */

        instrument_next = 1;
    
      }

    }

  }

  if (ins_lines)
    fputs(use_64bit ? main_payload_64 : main_payload_32, outf);

  if (input_file) fclose(inf);
  fclose(outf);

  if (!be_quiet) {

    if (!ins_lines) WARNF("No instrumentation targets found%s.",
                          pass_thru ? " (pass-thru mode)" : "");
    else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
             ins_lines, use_64bit ? "64" : "32",
             getenv("AFL_HARDEN") ? "hardened" : 
             (sanitizer ? "ASAN/MSAN" : "non-hardened"),
             inst_ratio);
 
  }

}

主要的插桩代码。

打开 input_file 文件，获取输入流为 inf ，在没有权限的时候抛出异常。如果 input_file 为空则从 stdin 中读取
打开 modified_file ，获取输出流为 outf ，验证文件可否读写，并在没有权限的时候抛出异常
从 inf 中循环读取 MAX_LINE = 8192 字节并写入到 outf 中，并且进入真正的插桩逻辑中，这里的插桩只向 .text 段插入：
- 如果没有 pass_thru、skip_xxx 的标志信息，且满足其他的条件（最主要的是 instr_ok 和 instrument_next ）和 line 的开头为 \t + digest ，则插入插桩的蹦床指令（trampoline_fmt）并清空 instrument_next
- 如果是 pass_thru 模式，则跳过
- OpenBSD 将跳转表直接内嵌到代码中，所以判别 p2align 指令并跳过
- 如果找到了 .text 段则设置 instr_ok 为 1 ，否则找到其他段设 instr_ok 为 0
- 通过几个判断来设置一些标志信息
  - off-flavor assembly : .code32 或 .code64，设置 skip_csect
  - AT&T : .intel_syntax 或 .att_syntax ，设置 skip_intel
  - ad-hoc asm blocks : #APP 或 NO_APP，设置 skip_app
接下来是重要的部分，重点关注以下内容：
- ^main ：main 函数
- ^.L0 ：gcc 分支标记
- ^.LBB0_0 ：clang 分支标记
- ^\tjnz foo ：条件跳转分支标记
而不会去关注以下内容：
- ^# BB#0 ：clang 注释
- ^ # BB#0 ：同上
- ^.Ltmp0 ：clang 非分支标记
- ^.LC0 ：gcc 非分支标记
- ^.LBB0_0 ：同上
- ^\tjmp foo ：非条件跳转分支标记
根据标志信息、instr_ok 和是否开头为 # 或空格来判断是否继续
对于 \tj[^m] 的条件跳转指令，使用 R(100) 获得随机数并与插桩密度 inst_ratio 比较，如果小于则根据架构将插桩的蹦床指令（trampoline_fmt）写入 outf 中，并将插桩计数 ins_lines 增加后进行下一次遍历
检查该行中是否存在 “:” ，如果是 macos 则会检查多一点。然后检查是否以 “.” 开头
- 如果是则检查是否为 .L / .LBB 的格式，也就是分支标记，使用 R(100) 获得随机数并与插桩密度 inst_ratio 比较，如果小于则设置 instrument_next 为 1
- 如果不是则代表是一个 funciton 标记，直接设置 instrument_next 为 1
- 设置了 instrument_next 便会在下一次循环的时候插入蹦床指令（trampoline_fmt）
一直循环直到读完 inf
如果插桩计数器 ins_lines 不为 0 ，就写入 main_payload 到 outf
关闭 inf 并打印一些信息

可以看得出来 afl-as 就是通过汇编的前导命令来判断是否为一个分支或函数，如果是则插入蹦床指令（trampoline_fmt）

另外插入蹦床指令（trampoline_fmt）的时候都会传入 R(MAP_SIZE)，用于随机化 id

trampoline_fmt

以 trampoline_fmt_64 为例来查看 trampoline_fmt ：

static const u8* trampoline_fmt_64 =

  "\n"
  "/* --- AFL TRAMPOLINE (64-BIT) --- */\n"
  "\n"
  ".align 4\n"
  "\n"
  "leaq -(128+24)(%%rsp), %%rsp\n"
  "movq %%rdx,  0(%%rsp)\n"
  "movq %%rcx,  8(%%rsp)\n" // 向rcx中存储识别代码块的随机桩代码id
  "movq %%rax, 16(%%rsp)\n"
  "movq $0x%08x, %%rcx\n"
  "call __afl_maybe_log\n" // 调用__afl_maybe_log函数
  "movq 16(%%rsp), %%rax\n"
  "movq  8(%%rsp), %%rcx\n"
  "movq  0(%%rsp), %%rdx\n"
  "leaq (128+24)(%%rsp), %%rsp\n"
  "\n"
  "/* --- END --- */\n"
  "\n";

其中这里 mov rcx, xxx 的 xxx 就是 R(MAP_SIZE)，以此来标示和区分每个分支点

对应 IDA 中为：

main_payload

以 main_payload_64 为例来查看 main_payload ：

/* The OpenBSD hack is due to lahf and sahf not being recognized by some
   versions of binutils: http://marc.info/?l=openbsd-cvs&m=141636589924400

   The Apple code is a bit different when calling libc functions because
   they are doing relocations differently from everybody else. We also need
   to work around the crash issue with .lcomm and the fact that they don't
   recognize .string. */

#ifdef __APPLE__
#  define CALL_L64(str)		"call _" str "\n"
#else
#  define CALL_L64(str)		"call " str "@PLT\n"
#endif /* ^__APPLE__ */

static const u8* main_payload_64 = 

  "\n"
  "/* --- AFL MAIN PAYLOAD (64-BIT) --- */\n"
  "\n"
  ".text\n"
  ".att_syntax\n"
  ".code64\n"
  ".align 8\n"
  "\n"
  "__afl_maybe_log:\n"
  "\n"
#if defined(__OpenBSD__)  || (defined(__FreeBSD__) && (__FreeBSD__ < 9))
  "  .byte 0x9f /* lahf */\n"
#else
  "  lahf\n"
#endif /* ^__OpenBSD__, etc */
  "  seto  %al\n"
  "\n"
  "  /* Check if SHM region is already mapped. */\n"
  "\n"
  "  movq  __afl_area_ptr(%rip), %rdx\n"
  "  testq %rdx, %rdx\n"
  "  je    __afl_setup\n"
  "\n"
  "__afl_store:\n"
  "\n"
  "  /* Calculate and store hit for the code location specified in rcx. */\n"
  "\n"
#ifndef COVERAGE_ONLY
  "  xorq __afl_prev_loc(%rip), %rcx\n"
  "  xorq %rcx, __afl_prev_loc(%rip)\n"
  "  shrq $1, __afl_prev_loc(%rip)\n"
#endif /* ^!COVERAGE_ONLY */
  "\n"
#ifdef SKIP_COUNTS
  "  orb  $1, (%rdx, %rcx, 1)\n"
#else
  "  incb (%rdx, %rcx, 1)\n"
#endif /* ^SKIP_COUNTS */
  "\n"
  "__afl_return:\n"
  "\n"
  "  addb $127, %al\n"
#if defined(__OpenBSD__)  || (defined(__FreeBSD__) && (__FreeBSD__ < 9))
  "  .byte 0x9e /* sahf */\n"
#else
  "  sahf\n"
#endif /* ^__OpenBSD__, etc */
  "  ret\n"
  "\n"
  ".align 8\n"
  "\n"
  "__afl_setup:\n"
  "\n"
  "  /* Do not retry setup if we had previous failures. */\n"
  "\n"
  "  cmpb $0, __afl_setup_failure(%rip)\n"
  "  jne __afl_return\n"
  "\n"
  "  /* Check out if we have a global pointer on file. */\n"
  "\n"
#ifndef __APPLE__
  "  movq  __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
  "  movq  (%rdx), %rdx\n"
#else
  "  movq  __afl_global_area_ptr(%rip), %rdx\n"
#endif /* !^__APPLE__ */
  "  testq %rdx, %rdx\n"
  "  je    __afl_setup_first\n"
  "\n"
  "  movq %rdx, __afl_area_ptr(%rip)\n"
  "  jmp  __afl_store\n" 
  "\n"
  "__afl_setup_first:\n"
  "\n"
  "  /* Save everything that is not yet saved and that may be touched by\n"
  "     getenv() and several other libcalls we'll be relying on. */\n"
  "\n"
  "  leaq -352(%rsp), %rsp\n"
  "\n"
  "  movq %rax,   0(%rsp)\n"
  "  movq %rcx,   8(%rsp)\n"
  "  movq %rdi,  16(%rsp)\n"
  "  movq %rsi,  32(%rsp)\n"
  "  movq %r8,   40(%rsp)\n"
  "  movq %r9,   48(%rsp)\n"
  "  movq %r10,  56(%rsp)\n"
  "  movq %r11,  64(%rsp)\n"
  "\n"
  "  movq %xmm0,  96(%rsp)\n"
  "  movq %xmm1,  112(%rsp)\n"
  "  movq %xmm2,  128(%rsp)\n"
  "  movq %xmm3,  144(%rsp)\n"
  "  movq %xmm4,  160(%rsp)\n"
  "  movq %xmm5,  176(%rsp)\n"
  "  movq %xmm6,  192(%rsp)\n"
  "  movq %xmm7,  208(%rsp)\n"
  "  movq %xmm8,  224(%rsp)\n"
  "  movq %xmm9,  240(%rsp)\n"
  "  movq %xmm10, 256(%rsp)\n"
  "  movq %xmm11, 272(%rsp)\n"
  "  movq %xmm12, 288(%rsp)\n"
  "  movq %xmm13, 304(%rsp)\n"
  "  movq %xmm14, 320(%rsp)\n"
  "  movq %xmm15, 336(%rsp)\n"
  "\n"
  "  /* Map SHM, jumping to __afl_setup_abort if something goes wrong. */\n"
  "\n"
  "  /* The 64-bit ABI requires 16-byte stack alignment. We'll keep the\n"
  "     original stack ptr in the callee-saved r12. */\n"
  "\n"
  "  pushq %r12\n"
  "  movq  %rsp, %r12\n"
  "  subq  $16, %rsp\n"
  "  andq  $0xfffffffffffffff0, %rsp\n"
  "\n"
  "  leaq .AFL_SHM_ENV(%rip), %rdi\n"
  CALL_L64("getenv")
  "\n"
  "  testq %rax, %rax\n"
  "  je    __afl_setup_abort\n"
  "\n"
  "  movq  %rax, %rdi\n"
  CALL_L64("atoi")
  "\n"
  "  xorq %rdx, %rdx   /* shmat flags    */\n"
  "  xorq %rsi, %rsi   /* requested addr */\n"
  "  movq %rax, %rdi   /* SHM ID         */\n"
  CALL_L64("shmat")
  "\n"
  "  cmpq $-1, %rax\n"
  "  je   __afl_setup_abort\n"
  "\n"
  "  /* Store the address of the SHM region. */\n"
  "\n"
  "  movq %rax, %rdx\n"
  "  movq %rax, __afl_area_ptr(%rip)\n"
  "\n"
#ifdef __APPLE__
  "  movq %rax, __afl_global_area_ptr(%rip)\n"
#else
  "  movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
  "  movq %rax, (%rdx)\n"
#endif /* ^__APPLE__ */
  "  movq %rax, %rdx\n"
  "\n"
  "__afl_forkserver:\n"
  "\n"
  "  /* Enter the fork server mode to avoid the overhead of execve() calls. We\n"
  "     push rdx (area ptr) twice to keep stack alignment neat. */\n"
  "\n"
  "  pushq %rdx\n"
  "  pushq %rdx\n"
  "\n"
  "  /* Phone home and tell the parent that we're OK. (Note that signals with\n"
  "     no SA_RESTART will mess it up). If this fails, assume that the fd is\n"
  "     closed because we were execve()d from an instrumented binary, or because\n"
  "     the parent doesn't want to use the fork server. */\n"
  "\n"
  "  movq $4, %rdx               /* length    */\n"
  "  leaq __afl_temp(%rip), %rsi /* data      */\n"
  "  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi       /* file desc */\n"
  CALL_L64("write")
  "\n"
  "  cmpq $4, %rax\n"
  "  jne  __afl_fork_resume\n"
  "\n"
  "__afl_fork_wait_loop:\n"
  "\n"
  "  /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
  "\n"
  "  movq $4, %rdx               /* length    */\n"
  "  leaq __afl_temp(%rip), %rsi /* data      */\n"
  "  movq $" STRINGIFY(FORKSRV_FD) ", %rdi             /* file desc */\n"
  CALL_L64("read")
  "  cmpq $4, %rax\n"
  "  jne  __afl_die\n"
  "\n"
  "  /* Once woken up, create a clone of our process. This is an excellent use\n"
  "     case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
  "     caches getpid() results and offers no way to update the value, breaking\n"
  "     abort(), raise(), and a bunch of other things :-( */\n"
  "\n"
  CALL_L64("fork")
  "  cmpq $0, %rax\n"
  "  jl   __afl_die\n"
  "  je   __afl_fork_resume\n"
  "\n"
  "  /* In parent process: write PID to pipe, then wait for child. */\n"
  "\n"
  "  movl %eax, __afl_fork_pid(%rip)\n"
  "\n"
  "  movq $4, %rdx                   /* length    */\n"
  "  leaq __afl_fork_pid(%rip), %rsi /* data      */\n"
  "  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi             /* file desc */\n"
  CALL_L64("write")
  "\n"
  "  movq $0, %rdx                   /* no flags  */\n"
  "  leaq __afl_temp(%rip), %rsi     /* status    */\n"
  "  movq __afl_fork_pid(%rip), %rdi /* PID       */\n"
  CALL_L64("waitpid")
  "  cmpq $0, %rax\n"
  "  jle  __afl_die\n"
  "\n"
  "  /* Relay wait status to pipe, then loop back. */\n"
  "\n"
  "  movq $4, %rdx               /* length    */\n"
  "  leaq __afl_temp(%rip), %rsi /* data      */\n"
  "  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi         /* file desc */\n"
  CALL_L64("write")
  "\n"
  "  jmp  __afl_fork_wait_loop\n"
  "\n"
  "__afl_fork_resume:\n"
  "\n"
  "  /* In child process: close fds, resume execution. */\n"
  "\n"
  "  movq $" STRINGIFY(FORKSRV_FD) ", %rdi\n"
  CALL_L64("close")
  "\n"
  "  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi\n"
  CALL_L64("close")
  "\n"
  "  popq %rdx\n"
  "  popq %rdx\n"
  "\n"
  "  movq %r12, %rsp\n"
  "  popq %r12\n"
  "\n"
  "  movq  0(%rsp), %rax\n"
  "  movq  8(%rsp), %rcx\n"
  "  movq 16(%rsp), %rdi\n"
  "  movq 32(%rsp), %rsi\n"
  "  movq 40(%rsp), %r8\n"
  "  movq 48(%rsp), %r9\n"
  "  movq 56(%rsp), %r10\n"
  "  movq 64(%rsp), %r11\n"
  "\n"
  "  movq  96(%rsp), %xmm0\n"
  "  movq 112(%rsp), %xmm1\n"
  "  movq 128(%rsp), %xmm2\n"
  "  movq 144(%rsp), %xmm3\n"
  "  movq 160(%rsp), %xmm4\n"
  "  movq 176(%rsp), %xmm5\n"
  "  movq 192(%rsp), %xmm6\n"
  "  movq 208(%rsp), %xmm7\n"
  "  movq 224(%rsp), %xmm8\n"
  "  movq 240(%rsp), %xmm9\n"
  "  movq 256(%rsp), %xmm10\n"
  "  movq 272(%rsp), %xmm11\n"
  "  movq 288(%rsp), %xmm12\n"
  "  movq 304(%rsp), %xmm13\n"
  "  movq 320(%rsp), %xmm14\n"
  "  movq 336(%rsp), %xmm15\n"
  "\n"
  "  leaq 352(%rsp), %rsp\n"
  "\n"
  "  jmp  __afl_store\n"
  "\n"
  "__afl_die:\n"
  "\n"
  "  xorq %rax, %rax\n"
  CALL_L64("_exit")
  "\n"
  "__afl_setup_abort:\n"
  "\n"
  "  /* Record setup failure so that we don't keep calling\n"
  "     shmget() / shmat() over and over again. */\n"
  "\n"
  "  incb __afl_setup_failure(%rip)\n"
  "\n"
  "  movq %r12, %rsp\n"
  "  popq %r12\n"
  "\n"
  "  movq  0(%rsp), %rax\n"
  "  movq  8(%rsp), %rcx\n"
  "  movq 16(%rsp), %rdi\n"
  "  movq 32(%rsp), %rsi\n"
  "  movq 40(%rsp), %r8\n"
  "  movq 48(%rsp), %r9\n"
  "  movq 56(%rsp), %r10\n"
  "  movq 64(%rsp), %r11\n"
  "\n"
  "  movq  96(%rsp), %xmm0\n"
  "  movq 112(%rsp), %xmm1\n"
  "  movq 128(%rsp), %xmm2\n"
  "  movq 144(%rsp), %xmm3\n"
  "  movq 160(%rsp), %xmm4\n"
  "  movq 176(%rsp), %xmm5\n"
  "  movq 192(%rsp), %xmm6\n"
  "  movq 208(%rsp), %xmm7\n"
  "  movq 224(%rsp), %xmm8\n"
  "  movq 240(%rsp), %xmm9\n"
  "  movq 256(%rsp), %xmm10\n"
  "  movq 272(%rsp), %xmm11\n"
  "  movq 288(%rsp), %xmm12\n"
  "  movq 304(%rsp), %xmm13\n"
  "  movq 320(%rsp), %xmm14\n"
  "  movq 336(%rsp), %xmm15\n"
  "\n"
  "  leaq 352(%rsp), %rsp\n"
  "\n"
  "  jmp __afl_return\n"
  "\n"
  ".AFL_VARS:\n"
  "\n"

#ifdef __APPLE__

  "  .comm   __afl_area_ptr, 8\n"
#ifndef COVERAGE_ONLY
  "  .comm   __afl_prev_loc, 8\n"
#endif /* !COVERAGE_ONLY */
  "  .comm   __afl_fork_pid, 4\n"
  "  .comm   __afl_temp, 4\n"
  "  .comm   __afl_setup_failure, 1\n"

#else

  "  .lcomm   __afl_area_ptr, 8\n"
#ifndef COVERAGE_ONLY
  "  .lcomm   __afl_prev_loc, 8\n"
#endif /* !COVERAGE_ONLY */
  "  .lcomm   __afl_fork_pid, 4\n"
  "  .lcomm   __afl_temp, 4\n"
  "  .lcomm   __afl_setup_failure, 1\n"

#endif /* ^__APPLE__ */

  "  .comm    __afl_global_area_ptr, 8, 8\n"
  "\n"
  ".AFL_SHM_ENV:\n"
  "  .asciz \"" SHM_ENV_VAR "\"\n"
  "\n"
  "/* --- END --- */\n"
  "\n";

#endif /* !_HAVE_AFL_AS_H */

比较长，大致流程如下图：

Untitled 1.png

分块进行说明

bss段的全局变量

.AFL_VARS:

  .lcomm   __afl_area_ptr, 8 // 共享内存地址
  .lcomm   __afl_prev_loc, 8 // 上一个插桩位置
  .lcomm   __afl_fork_pid, 4 // fork所产生的子进程pid
  .lcomm   __afl_temp, 4 // 临时缓冲区
  .lcomm   __afl_setup_failure, 1 // failure标志为，置位则退出
  .lcomm    __afl_global_area_ptr, 8, 8 // 全局区域指针

.AFL_SHM_ENV:
  .asciz __AFL_SHM_ID

__afl_maybe_log

__afl_maybe_log:

  lahf
  seto %al

  /* Check if SHM region is already mapped. */
  movl  __afl_area_ptr, %edx
  testl %edx, %edx
  je    __afl_setup

使用 lahf 指令加载状态标志位到 AH ，接着使用 seto 指令溢出置位
判断 __afl_area_ptr 是否初始化了
- 没有初始化则跳到 __afl_setup
- 初始化了则跳到 __afl_store

__afl_setup

__afl_setup:\
    /* Do not retry setup if we had previous failures. */
    cmpb $0, __afl_setup_failure(%rip)
    jne __afl_return
    /* Check out if we have a global pointer on file. */
    movq  __afl_global_area_ptr(%rip), %rdx
    testq %rdx, %rdx
    je    __afl_setup_first
    movq %rdx, __afl_area_ptr(%rip)
    jmp  __afl_store

如果 __afl_setup_failure 存在，则跳到 __afl_return 返回
如果 __afl_global_area_ptr 存在，则将 __afl_area_ptr 赋值为 __afl_global_area_ptr 并跳转到 __afl_store
否则跳转到 __afl_setup_first

__afl_setup_first

__afl_setup_first:
    /* Save everything that is not yet saved and that may be touched by
       getenv() and several other libcalls we'll be relying on. */

    leaq -352(%rsp), %rsp
    movq %rax,   0(%rsp)
    movq %rcx,   8(%rsp)
    movq %rdi,  16(%rsp)
    movq %rsi,  32(%rsp)
    movq %r8,   40(%rsp)
    movq %r9,   48(%rsp)
    movq %r10,  56(%rsp)
    movq %r11,  64(%rsp)

    movq %xmm0,  96(%rsp)
    movq %xmm1,  112(%rsp)
    movq %xmm2,  128(%rsp)
    movq %xmm3,  144(%rsp)
    movq %xmm4,  160(%rsp)
    movq %xmm5,  176(%rsp)
    movq %xmm6,  192(%rsp)
    movq %xmm7,  208(%rsp)
    movq %xmm8,  224(%rsp)
    movq %xmm9,  240(%rsp)
    movq %xmm10, 256(%rsp)
    movq %xmm11, 272(%rsp)
    movq %xmm12, 288(%rsp)
    movq %xmm13, 304(%rsp)
    movq %xmm14, 320(%rsp)
    movq %xmm15, 336(%rsp)

    /* Map SHM, jumping to __afl_setup_abort if something goes wrong. */

    /* The 64-bit ABI requires 16-byte stack alignment. We'll keep the
       original stack ptr in the callee-saved r12. */

    pushq %r12
    movq  %rsp, %r12
    subq  $16, %rsp
    andq  $0xfffffffffffffff0, %rsp

    leaq .AFL_SHM_ENV(%rip), %rdi
    CALL_L64("getenv")

    testq %rax, %rax\n"
    je    __afl_setup_abort\n"

    movq  %rax, %rdi
    CALL_L64("atoi")

    xorq %rdx, %rdx   /* shmat flags    */
    xorq %rsi, %rsi   /* requested addr */
    movq %rax, %rdi   /* SHM ID         */
    CALL_L64("shmat")

    cmpq $-1, %rax
    je   __afl_setup_abort

    /* Store the address of the SHM region. */

    movq %rax, %rdx\n"
    movq %rax, __afl_area_ptr(%rip)

    movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx
    movq %rax, (%rdx)

    movq %rax, %rdx

保存所有寄存器到栈上，包括 xmm 寄存器
将 rsp 进行对其
获取环境变量 __AFL_SHM_ID ，如果没有则跳到 __afl_setup_abort
调用 shmat(__AFL_SHM_ID, 0, 0) ，该函数把共享内存区对象映射到调用进程的地址空间，如果失败则跳到 __afl_setup_abort
存储返回的共享内存地址到 __afl_area_ptr 和 __afl_global_area_ptr 中
接下来走到 __afl_forkserver

__afl_forkserver

__afl_forkserver:

    /* Enter the fork server mode to avoid the overhead of execve() calls. We
       push rdx (area ptr) twice to keep stack alignment neat. */

    pushq %rdx
    pushq %rdx

    /* Phone home and tell the parent that we're OK. (Note that signals with
       no SA_RESTART will mess it up). If this fails, assume that the fd is
       closed because we were execve()d from an instrumented binary, or because
       the parent doesn't want to use the fork server. */

    movq $4, %rdx               /* length    */
    leaq __afl_temp(%rip), %rsi /* data      */
    movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi       /* file desc */
    CALL_L64("write")

    cmpq $4, %rax
    jne  __afl_fork_resume

#define STRINGIFY_INTERNAL(x) #x
#define STRINGIFY(x) STRINGIFY_INTERNAL(x)

/* Designated file descriptors for forkserver commands (the application will
   use FORKSRV_FD and FORKSRV_FD + 1): */

#define FORKSRV_FD          198

向 STRINGIFY((FORKSRV_FD + 1) 也就是 199 号描述符（即状态管道）中写入 __afl_temp 的四个字节，告诉 fork server 已经成功启动
如果失败跳到 __afl_fork_resume 中，否则接下来进入 __afl_fork_wait_loop

__afl_fork_wait_loop

__afl_fork_wait_loop:

    /* Wait for parent by reading from the pipe. Abort if read fails. */

    movq $4, %rdx               /* length    */
    leaq __afl_temp(%rip), %rsi /* data      */
    movq $" STRINGIFY(FORKSRV_FD) ", %rdi             /* file desc */
    CALL_L64("read")
    cmpq $4, %rax
    jne  __afl_die

    /* Once woken up, create a clone of our process. This is an excellent use\n"
       case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
       caches getpid() results and offers no way to update the value, breaking\n"
       abort(), raise(), and a bunch of other things :-( */

    CALL_L64("fork")
    cmpq $0, %rax
    jl   __afl_die
    je   __afl_fork_resume

    /* In parent process: write PID to pipe, then wait for child. */

    movl %eax, __afl_fork_pid(%rip)

    movq $4, %rdx                   /* length    */
    leaq __afl_fork_pid(%rip), %rsi /* data      */
    movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi             /* file desc */
    CALL_L64("write")

    movq $0, %rdx                   /* no flags  */
    leaq __afl_temp(%rip), %rsi     /* status    */
    movq __afl_fork_pid(%rip), %rdi /* PID       */
    CALL_L64("waitpid")
    cmpq $0, %rax
    jle  __afl_die

    /* Relay wait status to pipe, then loop back. */

    movq $4, %rdx               /* length    */
    leaq __afl_temp(%rip), %rsi /* data      */
    movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi         /* file desc */
    CALL_L64("write")

    jmp  __afl_fork_wait_loop

从 STRINGIFY(FORKSRV_FD) 即 198 中读四字节的数据，如果读取失败则跳到 __afl_die
fork 一个子进程，失败则跳到 __afl_die ，如果是子进程则跳到 __afl_fork_resume
将子进程的 pid 赋给 _afl_fork_pid ，向 STRINGIFY((FORKSRV_FD + 1)) 即 199 写入四字节的 _afl_fork_pid
waitpid 等待子进程执行完成，向 STRINGIFY((FORKSRV_FD + 1)) 即 199 写入四字节的 _afl_temp
重新执行下一轮的 __afl_fork_wait_loop

__afl_fork_resume

__afl_fork_resume:

    /* In child process: close fds, resume execution. */

    movq $" STRINGIFY(FORKSRV_FD) ", %rdi
    CALL_L64("close")

    movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi
    CALL_L64("close")

    popq %rdx
    popq %rdx

    movq %r12, %rsp
    popq %r12

    movq  0(%rsp), %rax
    movq  8(%rsp), %rcx
    movq 16(%rsp), %rdi
    movq 32(%rsp), %rsi
    movq 40(%rsp), %r8
    movq 48(%rsp), %r9
    movq 56(%rsp), %r10
    movq 64(%rsp), %r11

    movq  96(%rsp), %xmm0
    movq 112(%rsp), %xmm1
    movq 128(%rsp), %xmm2
    movq 144(%rsp), %xmm3
    movq 160(%rsp), %xmm4
    movq 176(%rsp), %xmm5
    movq 192(%rsp), %xmm6
    movq 208(%rsp), %xmm7
    movq 224(%rsp), %xmm8
    movq 240(%rsp), %xmm9
    movq 256(%rsp), %xmm10
    movq 272(%rsp), %xmm11
    movq 288(%rsp), %xmm12
    movq 304(%rsp), %xmm13
    movq 320(%rsp), %xmm14
    movq 336(%rsp), %xmm15

    leaq 352(%rsp), %rsp

    jmp  __afl_store

关闭子进程中的 198 和 199 的 fd
恢复子进程的寄存器状态
跳到 __afl_store

__afl_store

__afl_store:

    /* Calculate and store hit for the code location specified in rcx. */

    xorq __afl_prev_loc(%rip), %rcx
    xorq %rcx, __afl_prev_loc(%rip)
    shrq $1, __afl_prev_loc(%rip)

    incb (%rdx, %rcx, 1)

将上一个桩点 _afl_prev_loc 的值和当前桩点的值（R(MAP_SIZE)）进行异或，使共享内存中对应的槽的值加一，然后将 _afl_prev_loc 设为 R(MAP_SIZE) >> 1
- 这里之所以要右移一位，是因为假设存在 A→A 和 B→B 这样的两个跳转，则无法区分（或者A→B 和 B→A）
这里的 MAP_SIZE = 64K ，存在碰撞问题，但是概率可以接受

__afl_setup_abort

__afl_setup_abort:

    /* Record setup failure so that we don't keep calling
       shmget() / shmat() over and over again. */

    incb __afl_setup_failure(%rip)

    movq %r12, %rsp
    popq %r12

    movq  0(%rsp), %rax
    movq  8(%rsp), %rcx
    movq 16(%rsp), %rdi
    movq 32(%rsp), %rsi
    movq 40(%rsp), %r8
    movq 48(%rsp), %r9
    movq 56(%rsp), %r10
    movq 64(%rsp), %r11

    movq  96(%rsp), %xmm0
    movq 112(%rsp), %xmm1
    movq 128(%rsp), %xmm2
    movq 144(%rsp), %xmm3
    movq 160(%rsp), %xmm4
    movq 176(%rsp), %xmm5
    movq 192(%rsp), %xmm6
    movq 208(%rsp), %xmm7
    movq 224(%rsp), %xmm8
    movq 240(%rsp), %xmm9
    movq 256(%rsp), %xmm10
    movq 272(%rsp), %xmm11
    movq 288(%rsp), %xmm12
    movq 304(%rsp), %xmm13
    movq 320(%rsp), %xmm14
    movq 336(%rsp), %xmm15

    leaq 352(%rsp), %rsp

    jmp __afl_return

递增 _afl_setup_failure ，并恢复寄存器的值
跳到 __afl_return

__afl_return

__afl_return:

    addb $127, %al

    sahf

    ret
    .align 8

将 127 + al 返回回去

总结

整体而言过程总结如下：

初始化一些参数，包括共享内存，同时保存寄存器参数
- 只有一个 __afl_maybe_log 会进行这些操作
初始化成功后，向 199 文件描述符写四字节的值，告诉 afl-fuzz，fork server 启动成功
进入 __afl_fork_wait_loop 循环
- 从 198 文件描述符读四字节的值（代表 afl-fuzz 命令 fork server 新 fork 出一个进程来进行新的测试）
- fork 出子进程并执行测试用例，fork server 将子进程的 pid 写入状态管道告诉 afl-fuzz
- fork server 等待子进程运行结束，将结果保存至 _afl_temp 并写入状态管道告诉 afl-fuzz
- 进行下一次循环
如果共享内存已经被设置好了，则直接跳到 __afl_store 的逻辑
- 将上一个桩点 _afl_prev_loc 的值和当前桩点的值（R(MAP_SIZE)）进行异或，使共享内存中对应的槽的值加一，然后将 _afl_prev_loc 设为 R(MAP_SIZE) >> 1