From cefdec0644294c91d204e628a7fa1ad2cf6a8e39 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 10 Dec 2008 19:35:18 -0600 Subject: [PATCH] Use udis on x86 --- .../disassembler/disassembler-docs.factor | 6 +- basis/tools/disassembler/disassembler.factor | 52 ++++------- basis/tools/disassembler/gdb/gdb.factor | 36 ++++++++ basis/tools/disassembler/gdb/tags.txt | 1 + basis/tools/disassembler/udis/udis.factor | 91 +++++++++++++++++++ 5 files changed, 148 insertions(+), 38 deletions(-) create mode 100644 basis/tools/disassembler/gdb/gdb.factor create mode 100644 basis/tools/disassembler/gdb/tags.txt create mode 100644 basis/tools/disassembler/udis/udis.factor diff --git a/basis/tools/disassembler/disassembler-docs.factor b/basis/tools/disassembler/disassembler-docs.factor index f03861a8ed..7d193d0aac 100644 --- a/basis/tools/disassembler/disassembler-docs.factor +++ b/basis/tools/disassembler/disassembler-docs.factor @@ -3,11 +3,11 @@ USING: help.markup help.syntax sequences.private ; HELP: disassemble { $values { "obj" "a word or a pair of addresses" } } -{ $description "Disassembles either a compiled word definition or an arbitrary memory range (in the case " { $snippet "obj" } " is a pair of integers) by attaching " { $snippet "gdb" } " to the Factor VM and capturing the output." } -{ $notes "In some cases the Factor compiler emits data inline with code, which can confuse " { $snippet "gdb" } ". This occurs in words which call " { $link dispatch } ", where the jump table addresses are compiled inline. Also on the ARM architecture, various pointers are often compiled inline, and the preceeding instruction jumps over the inline pinter." } ; +{ $description "Disassembles either a compiled word definition or an arbitrary memory range (in the case " { $snippet "obj" } " is a pair of integers)." } +{ $notes "In some cases the Factor compiler emits data inline with code, which can confuse the disassembler. This occurs in words which call " { $link dispatch } ", where the jump table addresses are compiled inline." } ; ARTICLE: "tools.disassembler" "Disassembling words" -"The " { $vocab-link "tools.disassembler" } " vocabulary integrates Factor with the GNU debugger (" { $snippet "gdb" } ") for viewing the assembly code generated by the compiler. It can be used on both Unix and Windows as long as a working copy of " { $snippet "gdb" } " is installed and available in the " { $snippet "PATH" } "." +"The " { $vocab-link "tools.disassembler" } " vocabulary provides support for disassembling compiled word definitions. It uses the " { $snippet "libudis86" } " library on x86-32 and x86-64, and " { $snippet "gdb" } " on PowerPC." { $subsection disassemble } ; ABOUT: "tools.disassembler" diff --git a/basis/tools/disassembler/disassembler.factor b/basis/tools/disassembler/disassembler.factor index 76e1f0f1b8..fac340845b 100644 --- a/basis/tools/disassembler/disassembler.factor +++ b/basis/tools/disassembler/disassembler.factor @@ -1,43 +1,25 @@ -! Copyright (C) 2008 Slava Pestov, Jorge Acereda Macia. +! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: io.files io words alien kernel math.parser alien.syntax -io.launcher system assocs arrays sequences namespaces make -qualified system math compiler.codegen.fixup -io.encodings.ascii accessors generic tr ; +USING: tr arrays sequences io words generic system combinators +vocabs.loader ; IN: tools.disassembler -: in-file ( -- path ) "gdb-in.txt" temp-file ; +GENERIC: disassemble ( obj -- ) -: out-file ( -- path ) "gdb-out.txt" temp-file ; +SYMBOL: disassembler-backend -GENERIC: make-disassemble-cmd ( obj -- ) - -M: word make-disassemble-cmd - word-xt code-format - 2array make-disassemble-cmd ; - -M: pair make-disassemble-cmd - in-file ascii [ - "attach " write - current-process-handle number>string print - "disassemble " write - [ number>string write bl ] each - ] with-file-writer ; - -M: method-spec make-disassemble-cmd - first2 method make-disassemble-cmd ; - -: gdb-binary ( -- string ) "gdb" ; - -: run-gdb ( -- lines ) - - +closed+ >>stdin - out-file >>stdout - [ gdb-binary , "-x" , in-file , "-batch" , ] { } make >>command - try-process - out-file ascii file-lines ; +HOOK: disassemble* disassembler-backend ( from to -- lines ) TR: tabs>spaces "\t" "\s" ; -: disassemble ( obj -- ) - make-disassemble-cmd run-gdb - [ tabs>spaces ] map [ print ] each ; +M: pair disassemble first2 disassemble* [ tabs>spaces print ] each ; + +M: word disassemble word-xt 2array disassemble ; + +M: method-spec disassemble first2 method disassemble ; + +cpu { + { x86.32 [ "tools.disassembler.udis" ] } + { x86.64 [ "tools.disassembler.udis" ] } + { ppc [ "tools.disassembler.gdb" ] } +} case require diff --git a/basis/tools/disassembler/gdb/gdb.factor b/basis/tools/disassembler/gdb/gdb.factor new file mode 100644 index 0000000000..65d0e2f43a --- /dev/null +++ b/basis/tools/disassembler/gdb/gdb.factor @@ -0,0 +1,36 @@ +! Copyright (C) 2008 Slava Pestov, Jorge Acereda Macia. +! See http://factorcode.org/license.txt for BSD license. +USING: io.files io words alien kernel math.parser alien.syntax +io.launcher system assocs arrays sequences namespaces make +qualified system math io.encodings.ascii accessors +tools.disassembler ; +IN: tools.disassembler.gdb + +SINGLETON: gdb-disassembler + +: in-file ( -- path ) "gdb-in.txt" temp-file ; + +: out-file ( -- path ) "gdb-out.txt" temp-file ; + +: make-disassemble-cmd ( from to -- ) + in-file ascii [ + "attach " write + current-process-handle number>string print + "disassemble " write + [ number>string write bl ] bi@ + ] with-file-writer ; + +: gdb-binary ( -- string ) "gdb" ; + +: run-gdb ( -- lines ) + + +closed+ >>stdin + out-file >>stdout + [ gdb-binary , "-x" , in-file , "-batch" , ] { } make >>command + try-process + out-file ascii file-lines ; + +M: gdb-disassembler disassemble* + make-disassemble-cmd run-gdb ; + +gdb-disassembler disassembler-backend set-global diff --git a/basis/tools/disassembler/gdb/tags.txt b/basis/tools/disassembler/gdb/tags.txt new file mode 100644 index 0000000000..6bf68304bb --- /dev/null +++ b/basis/tools/disassembler/gdb/tags.txt @@ -0,0 +1 @@ +unportable diff --git a/basis/tools/disassembler/udis/udis.factor b/basis/tools/disassembler/udis/udis.factor new file mode 100644 index 0000000000..113c07c8c3 --- /dev/null +++ b/basis/tools/disassembler/udis/udis.factor @@ -0,0 +1,91 @@ +! Copyright (C) 2008 Slava Pestov, Jorge Acereda Macia. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.disassembler namespaces combinators +alien alien.syntax alien.c-types lexer parser kernel +sequences layouts math math.parser system make fry arrays ; +IN: tools.disassembler.udis + +<< : & scan "c-library" get load-library dlsym parsed ; parsing >> + +<< +"libudis86" { + { [ os macosx? ] [ "libudis86.0.dylib" ] } + { [ os unix? ] [ "libudis86.so.0" ] } + { [ os winnt? ] [ "libudis86.dll" ] } +} cond "cdecl" add-library +>> + +LIBRARY: libudis86 + +TYPEDEF: char[592] ud + +FUNCTION: void ud_translate_intel ( ud* u ) ; +FUNCTION: void ud_translate_att ( ud* u ) ; + +: UD_SYN_INTEL & ud_translate_intel ; inline +: UD_SYN_ATT & ud_translate_att ; inline +: UD_EOI -1 ; inline +: UD_INP_CACHE_SZ 32 ; inline +: UD_VENDOR_AMD 0 ; inline +: UD_VENDOR_INTEL 1 ; inline + +FUNCTION: void ud_init ( ud* u ) ; +FUNCTION: void ud_set_mode ( ud* u, uint8_t mode ) ; +FUNCTION: void ud_set_pc ( ud* u, ulonglong pc ) ; +FUNCTION: void ud_set_input_buffer ( ud* u, uint8_t* offset, size_t size ) ; +FUNCTION: void ud_set_vendor ( ud* u, uint vendor ) ; +FUNCTION: void ud_set_syntax ( ud* u, void* syntax ) ; +FUNCTION: void ud_input_skip ( ud* u, size_t size ) ; +FUNCTION: int ud_input_end ( ud* u ) ; +FUNCTION: uint ud_decode ( ud* u ) ; +FUNCTION: uint ud_disassemble ( ud* u ) ; +FUNCTION: char* ud_insn_asm ( ud* u ) ; +FUNCTION: void* ud_insn_ptr ( ud* u ) ; +FUNCTION: ulonglong ud_insn_off ( ud* u ) ; +FUNCTION: char* ud_insn_hex ( ud* u ) ; +FUNCTION: uint ud_insn_len ( ud* u ) ; +FUNCTION: char* ud_lookup_mnemonic ( int c ) ; + +: ( -- ud ) + "ud" + dup ud_init + dup cell-bits ud_set_mode + dup UD_SYN_INTEL ud_set_syntax ; + +SINGLETON: udis-disassembler + +: buf/len ( from to -- buf len ) [ drop ] [ swap - ] 2bi ; + +: format-disassembly ( lines -- lines' ) + dup [ second length ] map supremum + '[ + [ + [ first >hex cell 2 * CHAR: 0 pad-left % ": " % ] + [ second _ CHAR: \s pad-right % " " % ] + [ third % ] + tri + ] "" make + ] map ; + +: (disassemble) ( ud -- lines ) + [ + dup '[ + _ ud_disassemble 0 = + [ f ] [ + _ + [ ud_insn_off ] + [ ud_insn_hex ] + [ ud_insn_asm ] + tri 3array , t + ] if + ] loop + ] { } make ; + +M: udis-disassembler disassemble* ( from to -- buffer ) + [ ] 2dip { + [ drop ud_set_pc ] + [ buf/len ud_set_input_buffer ] + [ 2drop (disassemble) format-disassembly ] + } 3cleave ; + +udis-disassembler disassembler-backend set-global