368
技術社區[雲棲]
jvm開發筆記2—java反匯編器
作者:王智通
這兩天在class文件解析器的基礎上, 加上了java反匯編的功能, 反匯編器是指令解釋器的基礎,通過編寫反匯編器可以熟悉jvm的指令係統, 不過jvm的指令一共有201個,反匯編過程基本就是個體力活。在《java虛擬機規範》中對每一條指令都有了詳細的描述,下麵說說我是如何解析bytecode的:
一個java文件經過javac編譯後會生成class格式文件, 在class格式中method字段裏會有Code屬性,Code屬性包含了java的指令碼和長度。 首先用class解析器將指令碼提取出來, 舉個例子:
test.java
class aa {
int a = 6;
};
public class test {
public static void main(String args[]) {
int i = 0;
for (i = 0; i < 5; i++)
System.out.println("hehe");
}
}
我們用class文件解析器把test對應的bytecode打印出來:
len: 5
0x2a0xb70x00x10xb1
這一串bytecode為:0x2a0xb70x00x10xb1, 長度是5個字節。
對照《java虛擬機規範》我們來一步步手工解析:
0x2a代表aload_0指令, 它將本地局部變量中的第一個變量壓入到堆棧裏。這個指令本身長度就是一個字節,沒有參數, 因此0x2a的解析就非常簡單, 直接在屏幕打印出aload_0即可:
printf(“%s\n”, symbol);
0xb7代表invokespecial 它用來調用超類構造方法,實例初始化方法, 私有方法。它的用法如下:
invokespecial indexbyte1 indexbyte2,indexbyte1和indexbyte2各占一個字節,用(indexbyte1 << 8) | indexbyte2來構建一個常量池中的索引。每個jvm指令本身都占用一個字節,加上它的兩個參數, invokespecial語句它將占用3個字節空間。 所以它的解析算法如下:
u2 index;
index = ((*(u1 *)(base + 1)) << 8) | (*(u1 *)(base + 2));
printf("%s #%x\n", symbol, index);
注意0xb7解析完後,我們要跳過3個字節的地址,那麼就是0xb1了, 它是return指令,沒有參數,因此它的解析方法跟aload_0一樣:
printf(“%s\n”, symbol);
以上是我們手工解析的過程, 但是jvm有201條指令, 我們需要建立一個合適的數據結構:
typedef int (*interp_func)(u2 opcode_len, char *symbol, void *base);
typedef struct bytecode_st {
u2 opcode; // jvm的指令碼
u2 opcode_len; // 指令總的長度,包括參數
char symbol[OPCODE_SYMBOL_LEN]; // 指令對應的助記符
interp_func func; // 解析指令的回調函數
}BYTECODE;
我們可以直接建立一個大的BYTECODE數組:
BYTECODE jvm_byte_code[OPCODE_LEN] = {
{0x00, 1, "nop", jvm_interp_nop},
{0x01, 1, "aconst_null", jvm_interp_aconst_null},
{0x02, 1, "iconst_m1", jvm_interp_iconst_m1},
{0x03, 1, "iconst_0", jvm_interp_iconst_0},
{0x04, 1, "iconst_1", jvm_interp_iconst_1},
{0x05, 1, "iconst_2", jvm_interp_iconst_2},
{0x06, 1, "iconst_3", jvm_interp_iconst_3},
{0x07, 1, "iconst_4", jvm_interp_iconst_4},
{0x08, 1, "iconst_5", jvm_interp_iconst_5},
{0x09, 1, "lconst_0", jvm_interp_lconst_0},
{0x0a, 1, "lconst_1", jvm_interp_lconst_1},
{0x0b, 1, "fconst_0", jvm_interp_fconst_0},
{0x0c, 1, "fconst_1", jvm_interp_fconst_1},
{0x0d, 1, "fconst_2", jvm_interp_fconst_2},
{0x0e, 1, "dconst_0", jvm_interp_dconst_0},
{0x0f, 1, "dconst_1", jvm_interp_dconst_1},
{0x10, 1, "bipush", jvm_interp_bipush},
{0x11, 1, "sipush", jvm_interp_sipush},
{0x12, 2, "ldc", jvm_interp_ldc},
{0x13, 1, "ldc_w", jvm_interp_ldc_w},
{0x14, 1, "ldc2_w", jvm_interp_ldc2_w},
{0x15, 1, "iload", jvm_interp_iload},
{0x16, 1, "lload", jvm_interp_lload},
{0x17, 1, "fload", jvm_interp_fload},
{0x18, 1, "dload", jvm_interp_dload},
{0x19, 1, "aload", jvm_interp_aload},
{0x1a, 1, "iload_0", jvm_interp_iload_0},
{0x1b, 1, "iload_1", jvm_interp_iload_1},
{0x1c, 1, "iload_2", jvm_interp_iload_2},
{0x1d, 1, "iload_3", jvm_interp_iload_3},
{0x1e, 1, "lload_0", jvm_interp_lload_0},
{0x1f, 1, "lload_1", jvm_interp_lload_1},
{0x20, 1, "lload_2", jvm_interp_lload_2},
{0x21, 1, "lload_3", jvm_interp_lload_3},
{0x22, 1, "fload_0", jvm_interp_fload_0},
{0x23, 1, "fload_1", jvm_interp_fload_1},
{0x24, 1, "fload_2", jvm_interp_fload_2},
{0x25, 1, "fload_3", jvm_interp_fload_3},
{0x26, 1, "dload_0", jvm_interp_dload_0},
{0x27, 1, "dload_1", jvm_interp_dload_1},
{0x28, 1, "dload_2", jvm_interp_dload_2},
{0x29, 1, "dload_3", jvm_interp_dload_3},
{0x2a, 1, "aload_0", jvm_interp_aload_0},
{0x2b, 1, "aload_1", jvm_interp_aload_1},
{0x2c, 1, "aload_2", jvm_interp_aload_2},
{0x2d, 1, "aload_3", jvm_interp_aload_3},
{0x2e, 1, "iaload", jvm_interp_iaload},
{0x2f, 1, "laload", jvm_interp_laload},
{0x30, 1, "faload", jvm_interp_faload},
{0x31, 1, "daload", jvm_interp_daload},
{0x32, 1, "aaload", jvm_interp_aaload},
{0x33, 1, "baload", jvm_interp_baload},
{0x34, 1, "caload", jvm_interp_caload},
{0x35, 1, "saload", jvm_interp_saload},
{0x36, 1, "istore", jvm_interp_istore},
{0x37, 1, "lstore", jvm_interp_lstore},
{0x38, 1, "fstore", jvm_interp_fstore},
{0x39, 1, "dstore", jvm_interp_dstore},
{0x3a, 1, "astore", jvm_interp_astore},
{0x3b, 1, "istore_0", jvm_interp_istore_0},
{0x3c, 1, "istore_1", jvm_interp_istore_1},
{0x3d, 1, "istore_2", jvm_interp_istore_2},
{0x3e, 1, "istore_3", jvm_interp_istore_3},
{0x3f, 1, "lstore_0", jvm_interp_lstore_0},
{0x40, 1, "lstore_1", jvm_interp_lstore_1},
{0x41, 1, "lstore_2", jvm_interp_lstore_2},
{0x42, 1, "lstore_3", jvm_interp_lstore_3},
{0x43, 1, "fstore_0", jvm_interp_fstore_0},
{0x44, 1, "fstore_1", jvm_interp_fstore_1},
{0x45, 1, "fstore_2", jvm_interp_fstore_2},
{0x46, 1, "fstore_3", jvm_interp_fstore_3},
{0x47, 1, "dstore_0", jvm_interp_dstore_0},
{0x48, 1, "dstore_1", jvm_interp_dstore_1},
{0x49, 1, "dstore_2", jvm_interp_dstore_2},
{0x4a, 1, "dstore_3", jvm_interp_dstore_3},
{0x4b, 1, "astore_0", jvm_interp_astore_0},
{0x4c, 1, "astore_1", jvm_interp_astore_1},
{0x4d, 1, "astore_2", jvm_interp_astore_2},
{0x4e, 1, "astore_3", jvm_interp_astore_3},
{0x4f, 1, "iastore", jvm_interp_iastore},
{0x50, 1, "lastore", jvm_interp_lastore},
{0x51, 1, "fastore", jvm_interp_fastore},
{0x52, 1, "dastore", jvm_interp_dastore},
{0x53, 1, "aastore", jvm_interp_aastore},
{0x54, 1, "bastore", jvm_interp_bastore},
{0x55, 1, "castore", jvm_interp_castore},
{0x56, 1, "sastore", jvm_interp_sastore},
{0x57, 1, "pop", jvm_interp_pop},
{0x58, 1, "pop2", jvm_interp_pop2},
{0x59, 1, "dup", jvm_interp_dup},
{0x5a, 1, "dup_x1", jvm_interp_dup_x1},
{0x5b, 1, "dup_x2", jvm_interp_dup_x2},
{0x5c, 1, "dup2", jvm_interp_dup2},
{0x5d, 1, "dup2_x1", jvm_interp_dup2_x1},
{0x5e, 1, "dup2_x2", jvm_interp_dup2_x2},
{0x5f, 1, "swap", jvm_interp_swap},
{0x60, 1, "iadd", jvm_interp_iadd},
{0x61, 1, "ladd", jvm_interp_ladd},
{0x62, 1, "fadd", jvm_interp_fadd},
{0x63, 1, "dadd", jvm_interp_dadd},
{0x64, 1, "isub", jvm_interp_isub},
{0x65, 1, "lsub", jvm_interp_lsub},
{0x66, 1, "fsub", jvm_interp_fsub},
{0x67, 1, "dsub", jvm_interp_dsub},
{0x68, 1, "imul", jvm_interp_imul},
{0x69, 1, "lmul", jvm_interp_lmul},
{0x6a, 1, "fmul", jvm_interp_fmul},
{0x6b, 1, "dmul", jvm_interp_dmul},
{0x6c, 1, "idiv", jvm_interp_idiv},
{0x6d, 1, "ldiv", jvm_interp_ldiv},
{0x6e, 1, "fdiv", jvm_interp_fdiv},
{0x6f, 1, "ddiv", jvm_interp_ddiv},
{0x70, 1, "irem", jvm_interp_irem},
{0x71, 1, "lrem", jvm_interp_lrem},
{0x72, 1, "frem", jvm_interp_frem},
{0x73, 1, "drem", jvm_interp_drem},
{0x74, 1, "ineg", jvm_interp_ineg},
{0x75, 1, "lneg", jvm_interp_lneg},
{0x76, 1, "fneg", jvm_interp_fneg},
{0x77, 1, "dneg", jvm_interp_dneg},
{0x78, 1, "ishl", jvm_interp_ishl},
{0x79, 1, "lshl", jvm_interp_lshl},
{0x7a, 1, "ishr", jvm_interp_ishr},
{0x7b, 1, "lshr", jvm_interp_lshr},
{0x7c, 1, "iushr", jvm_interp_iushr},
{0x7d, 1, "lushr", jvm_interp_lushr},
{0x7e, 1, "iand", jvm_interp_iand},
{0x7f, 1, "land", jvm_interp_land},
{0x80, 1, "ior", jvm_interp_ior},
{0x81, 1, "lor", jvm_interp_lor},
{0x82, 1, "ixor", jvm_interp_ixor},
{0x83, 1, "lxor", jvm_interp_lxor},
{0x84, 3, "iinc", jvm_interp_iinc},
{0x85, 1, "i2l", jvm_interp_i2l},
{0x86, 1, "i2f", jvm_interp_i2f},
{0x87, 1, "i2d", jvm_interp_i2d},
{0x88, 1, "l2i", jvm_interp_l2i},
{0x89, 1, "l2f", jvm_interp_l2f},
{0x8a, 1, "l2d", jvm_interp_l2d},
{0x8b, 1, "f2i", jvm_interp_f2i},
{0x8c, 1, "f2l", jvm_interp_f2l},
{0x8d, 1, "f2d", jvm_interp_f2d},
{0x8e, 1, "d2i", jvm_interp_d2i},
{0x8f, 1, "d2l", jvm_interp_d2l},
{0x90, 1, "d2f", jvm_interp_d2f},
{0x91, 1, "i2b", jvm_interp_i2b},
{0x92, 1, "i2c", jvm_interp_i2c},
{0x93, 1, "i2s", jvm_interp_i2s},
{0x94, 1, "lcmp", jvm_interp_lcmp},
{0x95, 1, "fcmpl", jvm_interp_fcmpl},
{0x96, 1, "fcmpg", jvm_interp_fcmpg},
{0x97, 1, "dcmpl", jvm_interp_dcmpl},
{0x98, 1, "dcmpg", jvm_interp_dcmpg},
{0x99, 1, "ifeq", jvm_interp_ifeq},
{0x9a, 1, "ifne", jvm_interp_ifne},
{0x9b, 1, "iflt", jvm_interp_iflt},
{0x9c, 1, "ifge", jvm_interp_ifge},
{0x9d, 1, "ifgt", jvm_interp_ifgt},
{0x9e, 1, "ifle", jvm_interp_ifle},
{0x9f, 1, "if_icmpeq", jvm_interp_if_icmpeq},
{0xa0, 1, "if_icmpne", jvm_interp_if_icmpne},
{0xa1, 1, "if_icmplt", jvm_interp_if_icmplt},
{0xa2, 3, "if_icmpge", jvm_interp_if_icmpge},
{0xa3, 1, "if_icmpgt", jvm_interp_if_icmpgt},
{0xa4, 1, "if_icmple", jvm_interp_if_icmple},
{0xa5, 1, "if_acmpeq", jvm_interp_if_acmpeq},
{0xa6, 1, "if_acmpne", jvm_interp_if_acmpne},
{0xa7, 3, "goto", jvm_interp_goto},
{0xa8, 1, "jsr", jvm_interp_jsr},
{0xa9, 1, "ret", jvm_interp_ret},
{0xaa, 1, "tableswitch", jvm_interp_tableswitch},
{0xab, 1, "lookupswitch", jvm_interp_lookupswitch},
{0xac, 1, "ireturn", jvm_interp_ireturn},
{0xad, 1, "lreturn", jvm_interp_lreturn},
{0xae, 1, "freturn", jvm_interp_freturn},
{0xaf, 1, "dreturn", jvm_interp_dreturn},
{0xb0, 1, "areturn", jvm_interp_areturn},
{0xb1, 1, "return", jvm_interp_return},
{0xb2, 3, "getstatic", jvm_interp_getstatic},
{0xb3, 1, "putstatic", jvm_interp_putstatic},
{0xb4, 1, "getfield", jvm_interp_getfield},
{0xb5, 1, "putfield", jvm_interp_putfield},
{0xb6, 3, "invokevirtual", jvm_interp_invokevirtual},
{0xb7, 3, "invokespecial", jvm_interp_invokespecial},
{0xb8, 1, "invokestatic", jvm_interp_invokestatic},
{0xb9, 1, "invokeinterface", jvm_interp_invokeinterface},
{0xba, 1, "invokedynamic", jvm_interp_invokedynamic},
{0xbb, 1, "new", jvm_interp_new},
{0xbc, 1, "newarray", jvm_interp_newarray},
{0xbd, 1, "anewarray", jvm_interp_anewarray},
{0xbe, 1, "arraylength", jvm_interp_arraylength},
{0xbf, 1, "athrow", jvm_interp_athrow},
{0xc0, 1, "checkcast", jvm_interp_checkcast},
{0xc1, 1, "instanceof", jvm_interp_instanceof},
{0xc2, 1, "monitorenter", jvm_interp_monitorenter},
{0xc3, 1, "monitorexit", jvm_interp_monitorexit},
{0xc4, 1, "wide", jvm_interp_wide},
{0xc5, 1, "multianewarray", jvm_interp_multianewarray},
{0xc6, 1, "ifnull", jvm_interp_ifnull},
{0xc7, 1, "ifnonnull", jvm_interp_ifnonnull},
{0xc8, 1, "goto_w", jvm_interp_goto_w},
{0xc9, 1, "jsr_w", jvm_interp_jsr_w},
};
每個jvm指令的指令碼就是數組的索引, 這樣就能找到指令對應的BYTECODE結構,通過調用其回調函數, 就可以進入具體的解析過程了。 這樣做的好處就是不用switch case一大堆分支了。
int jvm_interp_invokespecial(u2 len, char *symbol, void *base)
{
u2 index;
index = ((*(u1 *)(base + 1)) << 8) | (*(u1 *)(base + 2));
printf("%s #%x\n", symbol, index);
}
int jvm_interp_aload_0(u2 len, char *symbol, void *base)
{
printf("%s\n", symbol);
}
int jvm_interp_return(u2 len, char *symbol, void *base)
{
printf("%s\n", symbol);
}
int __disass_bytecode(u1 *base, u2 len)
{
u1 idx = 0;
u1 index;
while (idx < len) {
index = *(u1 *)(base + idx);
//printf("!0x%x\n", index);
jvm_byte_code[index].func(jvm_byte_code[index].opcode_len,
jvm_byte_code[index].symbol, base + idx);
idx += (u1)jvm_byte_code[index].opcode_len;
}
}
目前這個反匯編器隻能解析一小部分指令, 隨著開發的深入, 會慢慢補全的, 下麵是反匯編test.class的結果:
diassember bytecode: aload_0 invokespecial #1 return ----------------------------- iconst_0 istore_1 iconst_0 istore_1 iload_1 iconst_5 if_icmpge 17 getstatic #2 ldc #3 invokevirtual #4 iinc 1 1 goto 0xfff0 return
java工具集中提供了javap, 可以反匯編java指令,本來是想山寨一個javap的, 但是現在對jvm整體結構還是不清晰,數據結構還不能很好的設計出來, 但是隨著對jvm的了解深入, 反匯編器會越來越成熟。
源碼下載地址:
https://www.cloud-sec.org/jvm.tgz
最後更新:2017-04-03 07:57:05