juicebox_asm/
asm.rs

1// SPDX-License-Identifier: MIT
2//
3// Copyright (c) 2023, Johannes Stoelp <dev@memzero.de>
4
5//! The `x64` jit assembler.
6
7use crate::imm::Imm;
8use crate::mem::{AddrMode, Mem, Mem16, Mem32, Mem64, Mem8};
9use crate::reg::{Reg, Reg16, Reg32, Reg64, Reg8};
10use crate::Label;
11
12/// Encode the `REX` byte.
13const fn rex(w: bool, r: u8, x: u8, b: u8) -> u8 {
14    let w = if w { 1 } else { 0 };
15    let r = (r >> 3) & 1;
16    let x = (x >> 3) & 1;
17    let b = (b >> 3) & 1;
18    0b0100_0000 | ((w & 1) << 3) | (r << 2) | (x << 1) | b
19}
20
21/// Encode the `ModR/M` byte.
22const fn modrm(mod_: u8, reg: u8, rm: u8) -> u8 {
23    ((mod_ & 0b11) << 6) | ((reg & 0b111) << 3) | (rm & 0b111)
24}
25
26/// Encode the `SIB` byte.
27const fn sib(scale: u8, index: u8, base: u8) -> u8 {
28    ((scale & 0b11) << 6) | ((index & 0b111) << 3) | (base & 0b111)
29}
30
31/// `x64` jit assembler.
32pub struct Asm {
33    buf: Vec<u8>,
34}
35
36impl Asm {
37    /// Create a new `x64` jit assembler.
38    pub fn new() -> Asm {
39        // Some random default capacity.
40        let buf = Vec::with_capacity(1024);
41        Asm { buf }
42    }
43
44    /// Consume the assembler and get the emitted code.
45    pub fn into_code(self) -> Vec<u8> {
46        self.buf
47    }
48
49    /// Disassemble the code currently added to the assembler, using
50    /// [`ndisasm`](https://nasm.us/index.php) and print it to _stdout_. If
51    /// `ndisasm` is not available on the system this prints a warning and
52    /// becomes a nop.
53    ///
54    /// # Panics
55    ///
56    /// Panics if anything goes wrong with spawning, writing to or reading from
57    /// the `ndisasm` child process.
58    pub fn disasm(&self) {
59        crate::disasm::disasm(&self.buf);
60    }
61
62    /// Emit a slice of bytes.
63    pub(crate) fn emit(&mut self, bytes: &[u8]) {
64        self.buf.extend_from_slice(bytes);
65    }
66
67    /// Emit a slice of optional bytes.
68    fn emit_optional(&mut self, bytes: &[Option<u8>]) {
69        for byte in bytes.iter().filter_map(|&b| b) {
70            self.buf.push(byte);
71        }
72    }
73
74    /// Emit a slice of bytes at `pos`.
75    ///
76    /// # Panics
77    ///
78    /// Panics if [pos..pos+len] indexes out of bound of the underlying code buffer.
79    fn emit_at(&mut self, pos: usize, bytes: &[u8]) {
80        if let Some(buf) = self.buf.get_mut(pos..pos + bytes.len()) {
81            buf.copy_from_slice(bytes);
82        } else {
83            unimplemented!();
84        }
85    }
86
87    /// Bind the [Label] to the current location.
88    pub fn bind(&mut self, label: &mut Label) {
89        // Bind the label to the current offset.
90        label.bind(self.buf.len());
91
92        // Resolve any pending relocations for the label.
93        self.resolve(label);
94    }
95
96    /// If the [Label] is bound, patch any pending relocation.
97    fn resolve(&mut self, label: &mut Label) {
98        if let Some(loc) = label.location() {
99            // Resolve any pending relocations for the label.
100            for off in label.offsets_mut().drain() {
101                // Displacement is relative to the next instruction following the jump.
102                let disp = {
103                    let loc = isize::try_from(loc).expect("loc does not fit into isize");
104                    let off = isize::try_from(off).expect("off does not fit into isize");
105
106                    // We record the offset to patch at the first byte of the disp32
107                    // therefore we need to account for that in the disp computation.
108                    loc - (off + 4/* account for the disp32 */)
109                };
110
111                // For now we only support disp32 as label location.
112                let disp32 = i32::try_from(disp).expect("Label offset did not fit into i32");
113
114                // Patch the relocation with the disp32.
115                self.emit_at(off, &disp32.to_ne_bytes());
116            }
117        }
118    }
119
120    // -- Encode utilities.
121
122    /// Encode an register-register instruction.
123    pub(crate) fn encode_rr<T: Reg>(&mut self, opc: &[u8], op1: T, op2: T)
124    where
125        Self: EncodeRR<T>,
126    {
127        // MR operand encoding.
128        //   op1 -> modrm.rm
129        //   op2 -> modrm.reg
130        let modrm = modrm(
131            0b11,      /* mod */
132            op2.idx(), /* reg */
133            op1.idx(), /* rm */
134        );
135
136        let prefix = <Self as EncodeRR<T>>::legacy_prefix();
137        let rex = <Self as EncodeRR<T>>::rex(op1, op2);
138
139        self.emit_optional(&[prefix, rex]);
140        self.emit(opc);
141        self.emit(&[modrm]);
142    }
143
144    /// Encode an offset-immediate instruction.
145    /// Register idx is encoded in the opcode.
146    pub(crate) fn encode_oi<T: Reg, U: Imm>(&mut self, opc: u8, op1: T, op2: U)
147    where
148        Self: EncodeR<T>,
149    {
150        let opc = opc + (op1.idx() & 0b111);
151        let prefix = <Self as EncodeR<T>>::legacy_prefix();
152        let rex = <Self as EncodeR<T>>::rex(op1);
153
154        self.emit_optional(&[prefix, rex]);
155        self.emit(&[opc]);
156        self.emit(op2.bytes());
157    }
158
159    /// Encode a register instruction.
160    pub(crate) fn encode_r<T: Reg>(&mut self, opc: u8, opc_ext: u8, op1: T)
161    where
162        Self: EncodeR<T>,
163    {
164        // M operand encoding.
165        //   op1           -> modrm.rm
166        //   opc extension -> modrm.reg
167        let modrm = modrm(
168            0b11,      /* mod */
169            opc_ext,   /* reg */
170            op1.idx(), /* rm */
171        );
172
173        let prefix = <Self as EncodeR<T>>::legacy_prefix();
174        let rex = <Self as EncodeR<T>>::rex(op1);
175
176        self.emit_optional(&[prefix, rex]);
177        self.emit(&[opc, modrm]);
178    }
179
180    /// Encode a memory operand instruction.
181    pub(crate) fn encode_m<T: Mem>(&mut self, opc: u8, opc_ext: u8, op1: T)
182    where
183        Self: EncodeM<T>,
184    {
185        // M operand encoding.
186        //   op1 -> modrm.rm
187        let (mode, rm) = match op1.mode() {
188            AddrMode::Indirect => {
189                assert!(!op1.base().need_sib() && !op1.base().is_pc_rel());
190                (0b00, op1.base().idx())
191            }
192            AddrMode::IndirectDisp => {
193                assert!(!op1.base().need_sib());
194                (0b10, op1.base().idx())
195            }
196            AddrMode::IndirectBaseIndex => {
197                assert!(!op1.base().is_pc_rel());
198                // Using rsp as index register is interpreted as just base w/o offset.
199                //   https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2
200                // Disallow this case, as guard for the user.
201                assert!(!matches!(op1.index(), Reg64::rsp));
202                (0b00, 0b100)
203            }
204        };
205
206        let modrm = modrm(
207            mode,    /* mode */
208            opc_ext, /* reg */
209            rm,      /* rm */
210        );
211
212        let prefix = <Self as EncodeM<T>>::legacy_prefix();
213        let rex = <Self as EncodeM<T>>::rex(&op1);
214
215        self.emit_optional(&[prefix, rex]);
216        self.emit(&[opc, modrm]);
217        match op1.mode() {
218            AddrMode::Indirect => {}
219            AddrMode::IndirectDisp => self.emit(&op1.disp().to_ne_bytes()),
220            AddrMode::IndirectBaseIndex => {
221                self.emit(&[sib(0, op1.index().idx(), op1.base().idx())])
222            }
223        }
224    }
225
226    /// Encode a memory-immediate instruction.
227    pub(crate) fn encode_mi<M: Mem, T: Imm>(&mut self, opc: u8, opc_ext: u8, op1: M, op2: T)
228    where
229        Self: EncodeM<M>,
230    {
231        // MI operand encoding.
232        //   op1 -> modrm.rm
233        //   op2 -> imm
234        let (mode, rm) = match op1.mode() {
235            AddrMode::Indirect => {
236                assert!(!op1.base().need_sib() && !op1.base().is_pc_rel());
237                (0b00, op1.base().idx())
238            }
239            AddrMode::IndirectDisp => {
240                assert!(!op1.base().need_sib());
241                (0b10, op1.base().idx())
242            }
243            AddrMode::IndirectBaseIndex => {
244                assert!(!op1.base().is_pc_rel());
245                // Using rsp as index register is interpreted as just base w/o offset.
246                //   https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2
247                // Disallow this case, as guard for the user.
248                assert!(!matches!(op1.index(), Reg64::rsp));
249                (0b00, 0b100)
250            }
251        };
252
253        let modrm = modrm(
254            mode,    /* mode */
255            opc_ext, /* reg */
256            rm,      /* rm */
257        );
258
259        let prefix = <Self as EncodeM<M>>::legacy_prefix();
260        let rex = <Self as EncodeM<M>>::rex(&op1);
261
262        self.emit_optional(&[prefix, rex]);
263        self.emit(&[opc, modrm]);
264        match op1.mode() {
265            AddrMode::Indirect => {}
266            AddrMode::IndirectDisp => self.emit(&op1.disp().to_ne_bytes()),
267            AddrMode::IndirectBaseIndex => {
268                self.emit(&[sib(0, op1.index().idx(), op1.base().idx())])
269            }
270        }
271        self.emit(op2.bytes());
272    }
273
274    /// Encode a memory-register instruction.
275    pub(crate) fn encode_mr<M: Mem, T: Reg>(&mut self, opc: u8, op1: M, op2: T)
276    where
277        Self: EncodeMR<M>,
278    {
279        // MR operand encoding.
280        //   op1 -> modrm.rm
281        //   op2 -> modrm.reg
282        let (mode, rm) = match op1.mode() {
283            AddrMode::Indirect => {
284                assert!(!op1.base().need_sib() && !op1.base().is_pc_rel());
285                (0b00, op1.base().idx())
286            }
287            AddrMode::IndirectDisp => {
288                assert!(!op1.base().need_sib());
289                (0b10, op1.base().idx())
290            }
291            AddrMode::IndirectBaseIndex => {
292                assert!(!op1.base().is_pc_rel());
293                // Using rsp as index register is interpreted as just base w/o offset.
294                //   https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2
295                // Disallow this case, as guard for the user.
296                assert!(!matches!(op1.index(), Reg64::rsp));
297                (0b00, 0b100)
298            }
299        };
300
301        let modrm = modrm(
302            mode,      /* mode */
303            op2.idx(), /* reg */
304            rm,        /* rm */
305        );
306
307        let prefix = <Self as EncodeMR<M>>::legacy_prefix();
308        let rex = <Self as EncodeMR<M>>::rex(&op1, op2);
309
310        self.emit_optional(&[prefix, rex]);
311        self.emit(&[opc, modrm]);
312        match op1.mode() {
313            AddrMode::Indirect => {}
314            AddrMode::IndirectDisp => self.emit(&op1.disp().to_ne_bytes()),
315            AddrMode::IndirectBaseIndex => {
316                self.emit(&[sib(0, op1.index().idx(), op1.base().idx())])
317            }
318        }
319    }
320
321    /// Encode a register-memory instruction.
322    pub(crate) fn encode_rm<T: Reg, M: Mem>(&mut self, opc: u8, op1: T, op2: M)
323    where
324        Self: EncodeMR<M>,
325    {
326        // RM operand encoding.
327        //   op1 -> modrm.reg
328        //   op2 -> modrm.rm
329        self.encode_mr(opc, op2, op1);
330    }
331
332    /// Encode a jump to label instruction.
333    pub(crate) fn encode_jmp_label(&mut self, opc: &[u8], op1: &mut Label) {
334        // Emit the opcode.
335        self.emit(opc);
336
337        // Record relocation offset starting at the first byte of the disp32.
338        op1.record_offset(self.buf.len());
339
340        // Emit a zeroed disp32, which serves as placeholder for the relocation.
341        // We currently only support disp32 jump targets.
342        self.emit(&[0u8; 4]);
343
344        // Resolve any pending relocations for the label.
345        self.resolve(op1);
346    }
347}
348
349// -- Encoder helper.
350
351/// Encode helper for register-register instructions.
352pub(crate) trait EncodeRR<T: Reg> {
353    fn legacy_prefix() -> Option<u8> {
354        None
355    }
356
357    fn rex(op1: T, op2: T) -> Option<u8> {
358        if op1.need_rex() || op2.need_rex() {
359            Some(rex(op1.rexw(), op2.idx(), 0, op1.idx()))
360        } else {
361            None
362        }
363    }
364}
365
366impl EncodeRR<Reg8> for Asm {}
367impl EncodeRR<Reg32> for Asm {}
368impl EncodeRR<Reg16> for Asm {
369    fn legacy_prefix() -> Option<u8> {
370        Some(0x66)
371    }
372}
373impl EncodeRR<Reg64> for Asm {}
374
375/// Encode helper for register instructions.
376pub(crate) trait EncodeR<T: Reg> {
377    fn legacy_prefix() -> Option<u8> {
378        None
379    }
380
381    fn rex(op1: T) -> Option<u8> {
382        if op1.need_rex() {
383            Some(rex(op1.rexw(), 0, 0, op1.idx()))
384        } else {
385            None
386        }
387    }
388}
389
390impl EncodeR<Reg8> for Asm {}
391impl EncodeR<Reg32> for Asm {}
392impl EncodeR<Reg16> for Asm {
393    fn legacy_prefix() -> Option<u8> {
394        Some(0x66)
395    }
396}
397impl EncodeR<Reg64> for Asm {}
398
399/// Encode helper for memory-register instructions.
400pub(crate) trait EncodeMR<M: Mem> {
401    fn legacy_prefix() -> Option<u8> {
402        None
403    }
404
405    fn rex<T: Reg>(op1: &M, op2: T) -> Option<u8> {
406        if M::is_64() || op2.is_ext() || op1.base().is_ext() || op1.index().is_ext() {
407            Some(rex(
408                M::is_64(),
409                op2.idx(),
410                op1.index().idx(),
411                op1.base().idx(),
412            ))
413        } else {
414            None
415        }
416    }
417}
418
419impl EncodeMR<Mem8> for Asm {}
420impl EncodeMR<Mem16> for Asm {
421    fn legacy_prefix() -> Option<u8> {
422        Some(0x66)
423    }
424}
425impl EncodeMR<Mem32> for Asm {}
426impl EncodeMR<Mem64> for Asm {}
427
428/// Encode helper for memory perand instructions.
429pub(crate) trait EncodeM<M: Mem> {
430    fn legacy_prefix() -> Option<u8> {
431        None
432    }
433
434    fn rex(op1: &M) -> Option<u8> {
435        if M::is_64() || op1.base().is_ext() || op1.index().is_ext() {
436            Some(rex(M::is_64(), 0, op1.index().idx(), op1.base().idx()))
437        } else {
438            None
439        }
440    }
441}
442
443impl EncodeM<Mem8> for Asm {}
444impl EncodeM<Mem16> for Asm {
445    fn legacy_prefix() -> Option<u8> {
446        Some(0x66)
447    }
448}
449impl EncodeM<Mem32> for Asm {}
450impl EncodeM<Mem64> for Asm {}