26 Nov 2016

QEMU TCG frontend

文章欢迎转载,但转载时请保留本段文字,并置于文章的顶部
Author: derek0883@gmail.com
本文原文地址:http://www.gorecursion.com/virtualization/2016/11/26/qemu-tcg1.html

Even you are running QEMU on X86 cpu, you still can emulate a PowerPC or other different type machine. In this case, QEMU TCG frontend will translate PPC instruction into intermediate RISC. Then TCG backend will “compile” those RISC instruction into X86 instruction.

TCG frontend

The following figure shows how TCG works. Figure from this slide

QEMU TCG

TCG frontend experiment

Download PowerPC Cirros to do this experiment.

# Log items (comma separated):
# out_asm         show generated host assembly code for each compiled TB
# in_asm          show target assembly code for each compiled TB
# op              show micro ops for each compiled TB

$ ./ppc-softmmu/qemu-system-ppc -monitor stdio  images/cirros-0.3.4-powerpc-disk.img -trace events=/tmp/events -d in_asm,op -D /tmp/qemu_ppc.log

Open /tmp/qemu_ppc.log you will see:

IN: 
0xfff02520:  li      r0,0
0xfff02524:  lis     r3,-32768
0xfff02528:  add.    r3,r3,r3
0xfff0252c:  beq-    0xfff02538

OP:
 ld_i32 tmp0,env,$0xfffffffffffffff8
 movi_i32 tmp1,$0x0
 brcond_i32 tmp0,tmp1,ne,$L0

 ---- fff02520
 movi_i32 r0,$0x0   # li r0, 0

 ---- fff02524
 movi_i32 r3,$0xffffffff80000000  # lis r3, -32768(80000000)

TCG translate PowerPC instruction into TCG RISC instruction. Full list TCG instruction description can be found in tcg/README file or tcg/tcg-opc.h. I didn’t dive into very detail of the meaning of each instruction. For people who want to port QEMU to emulate a new processor, need deal with frontend of QEMU.

QEMU call gen_intermediate_code to translate guest binary code into TCG intermediate code, I use GDB to check the call trace:

$ gdb ./ppc-softmmu/qemu-system-ppc
...
(gdb) file ./ppc-softmmu/qemu-system-ppc 
Reading symbols from ./ppc-softmmu/qemu-system-ppc...done.
(gdb) b gen_intermediate_code
Breakpoint 1 at 0x380b79: file qemu-2.8.0-rc0/target-ppc/translate.c, line 6986.
(gdb) r -monitor stdio  images/cirros-0.3.4-powerpc-disk.img -trace events=/tmp/events -d in_asm,op -D /tmp/qemu_ppc.log
Starting program: qemu-2.8.0-rc0/ppc-softmmu/qemu-system-ppc 
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
[New Thread 0x7ffff1ee4700 (LWP 4254)]
[New Thread 0x7ffff16e3700 (LWP 4255)]
[New Thread 0x7fffe9ffe700 (LWP 4256)]
[Switching to Thread 0x7fffe9ffe700 (LWP 4256)]

Thread 4 "qemu-system-ppc" hit Breakpoint 1, gen_intermediate_code (env=0x7ffff7e22290, tb=0x7ffff056c010)
    at qemu-2.8.0-rc0/target-ppc/translate.c:6986
6986	{
(gdb) bt
#0  gen_intermediate_code  at qemu-2.8.0-rc0/target-ppc/translate.c:6986
#1  0x00005555557808ac in tb_gen_code at qemu-2.8.0-rc0/translate-all.c:1311
#2  0x0000555555782925 in tb_find  at qemu-2.8.0-rc0/cpu-exec.c:346
#3  0x000055555578311f in cpu_exec at qemu-2.8.0-rc0/cpu-exec.c:637
#4  0x00005555557c46a6 in tcg_cpu_exec at qemu-2.8.0-rc0/cpus.c:1117
#5  0x00005555557c48ea in qemu_tcg_cpu_thread_fn  at qemu-2.8.0-rc0/cpus.c:1197
#6  0x00007ffff65746fa in start_thread  at pthread_create.c:333
#7  0x00007ffff62aab5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109

In vl.c, QEMU’s main function call cpu_ppc_init -> qemu_init_vcpu, then create the TCG thread, qemu_tcg_cpu_thread_fn.

TCG frontend Internal

In order to further study how TCG frontend works. I made the following changes:

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
@@ -39,7 +39,7 @@
 #define GDBSTUB_SINGLE_STEP 0x4
 
 /* Include definitions for instructions classes and implementations flags */
-//#define PPC_DEBUG_DISAS
-//#define DO_PPC_STATISTICS
+#define PPC_DEBUG_DISAS
+#define DO_PPC_STATISTICS

diff --git a/disas/ppc.c b/disas/ppc.c
@@ -5312,6 +5312,7 @@ print_insn_powerpc (bfd_vma memaddr,
       if (invalid)
        continue;
 
+     (*info->fprintf_func) (info->stream, ".long 0x%lx: ", insn);
       /* The instruction is valid.  */

Then re-compile source code, and run

./ppc-softmmu/qemu-system-ppc -monitor stdio  images/cirros-0.3.4-powerpc-disk.img -trace events=/tmp/events -d in_asm,op -D /tmp/qemu_ppc.log

Now I get following information in qemu_ppc.log file

----------------
----------------
nip=fff00100 super=3 ir=0
translate opcode 48002420 (12 10 10 00) (big)
IN: 
0xfff00100:  .long 0x48002420: b       0xfff02520

OP:
 ld_i32 tmp0,env,$0xfffffffffffffff8
 movi_i32 tmp1,$0x0
 brcond_i32 tmp0,tmp1,ne,$L0

 ---- fff00100  # <---- TCG intermediate output
 movi_i32 nip,$0xfffffffffff02520
 exit_tb $0x0
 set_label $L0
 exit_tb $0x7ffff05ed013

With above information, I was able to easily figure out how TCG was translated. e.g. At address 0xfff00100, PPC binary instruction is .long 0x48002420, It has been disassembled as (12 10 10 00) based on PPC standard. 1. it is a unconditional branch instruction in PPC, b 0xfff02520. 2. TCG frontend translate to movi_i32 nip,$0xfffffffffff02520, it just set nip to 0xfff02520, For movi_i32, High 32bit will be ignored. For target-ppc, nip is Next instruction pointer, so next instruction start at 0xfff02520, which equivalent to b 0xfff02520

// target-ppc/translate.c
LOG_DISAS("translate opcode %08x (%02x %02x %02x %02x) (%s)\n",
          ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
          opc3(ctx.opcode), opc4(ctx.opcode),
          ctx.le_mode ? "little" : "big");
ctx.nip += 4;
table = env->opcodes;
handler = table[opc1(ctx.opcode)];
...
(*(handler->handler))(&ctx);

For this instruction. 0x48002420: b 0xfff02520, the opc1(ctx.opcode) is 0x12.

// target-ppc/translate.c
static opcode_t opcodes[] = {
...
GEN_HANDLER(b, 0x12, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
...
}

#define GEN_HANDLER(name, opc1, opc2, opc3, inval, type) \
GEN_OPCODE(name, opc1, opc2, opc3, inval, type, PPC_NONE)

#define GEN_OPCODE(name, op1, op2, op3, invl, _typ, _typ2)                    \
{                                                                             \
    .opc1 = op1,                                                              \
    .opc2 = op2,                                                              \
    .opc3 = op3,                                                              \
    .opc4 = 0xff,                                                             \
    .handler = {                                                              \
        .inval1  = invl,                                                      \
        .type = _typ,                                                         \
        .type2 = _typ2,                                                       \
        .handler = &gen_##name,                                               \
    },                                                                        \
    .oname = stringify(name),                                                 \
}

So the handler for this instruction would be:

// target-ppc/translate.c
static void gen_b(DisasContext *ctx)
{
    target_ulong li, target;

    ctx->exception = POWERPC_EXCP_BRANCH;
    /* sign extend LI */
    li = LI(ctx->opcode);
    li = (li ^ 0x02000000) - 0x02000000;
    if (likely(AA(ctx->opcode) == 0)) {
        target = ctx->nip + li - 4;
    } else {
        target = li;
    }
    if (LK(ctx->opcode)) {
        gen_setlr(ctx, ctx->nip);
    }
    gen_update_cfar(ctx, ctx->nip - 4);
    gen_goto_tb(ctx, 0, target);

gen_b will call TCG generic function to generate TCG platform independent instruction.

static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
{
    if (NARROW_MODE(ctx)) {
        dest = (uint32_t) dest;
    }
    if (use_goto_tb(ctx, dest)) {
        tcg_gen_goto_tb(n);
        tcg_gen_movi_tl(cpu_nip, dest & ~3);
        tcg_gen_exit_tb((uintptr_t)ctx->tb + n);
    } else {
        tcg_gen_movi_tl(cpu_nip, dest & ~3);
        if (unlikely(ctx->singlestep_enabled)) {
            if ((ctx->singlestep_enabled &
                (CPU_BRANCH_STEP | CPU_SINGLE_STEP)) &&
                (ctx->exception == POWERPC_EXCP_BRANCH ||
                 ctx->exception == POWERPC_EXCP_TRACE)) {
                gen_exception_nip(ctx, POWERPC_EXCP_TRACE, dest);
            }
            if (ctx->singlestep_enabled & GDBSTUB_SINGLE_STEP) {
                gen_debug_exception(ctx);
            }
        }
        tcg_gen_exit_tb(0);
    }
}

All tcg_gen_xxx were defined in tcg/tcg-op.c, you can find a full list of description here

So if we need port QEMU to emulate a new processor, we need decode the binary instruction based on standard of this new processor, then fill the hander table, for each instruction, will will call tcg_gen_xx to generate TCG intermediate code. If this new processor is very powerful, and provide some complex function, e.g. Encrypt/Decrypt a block of data with single instruction, you need to implement equivalent function by calling multiple tcg_gen_xx function. After that, TCG backend will “compile” those code to native code”.