/// This is the transcript from today's class. [sergey@thepond ~]$ cat hello.c #include // How many times does this print "Hello"? // What happens if we change "int" to "unsigned int"? Why? int main() { int i = 10; while( i >= 0 ){ i = i - 1; puts( "Hello" ); } return 42; } // gcc -S stops compiling right after the compiler produced assembly, and produces a .s file: [sergey@thepond ~]$ gcc -S hello.c // Actually, let's get rid of .cfi_ lines that are used for handling C++ exceptions and // crowd out the output. I used to get rid of them with sed, // sed -i '/cfi_/d' hello.s // but asking Perplexity produced the right option for gcc to not generate these: [sergey@thepond ~]$ gcc -S -fno-asynchronous-unwind-tables hello.c [sergey@thepond ~]$ cat hello.s .file "hello.c" // lines that start with "." are metadata to be stored in a file .text .section .rodata .LC0: // the string "Hello\0" will be at address denoted as .LC0 .string "Hello" .text .globl main // These lines will end up in the "symbol table" of the executable .type main, @function // (including notation that "main" is a function) main: pushq %rbp // These two lines are the standard preamble that create the function's movq %rsp, %rbp // "stack frame" aka "activation record". // RSP will move around, EBP will stay put throughout this function, // and will serve as an anchor for local variables. subq $16, %rsp // This reserves a frame of 16 bytes. Whatever instructions affect // the stack pointer RSP from that point on (POPs, PUSHes, CALLs) // should not touch the reserved area where your local variables like "i" // live. movl $10, -4(%rbp) // "i" is 4 bytes down from RBP. 10 is written into these 4 bytes // Remember that RBP value points to the prior value of RBP of the // previous frame saved by the first PUSHQ of the function. Thus // all frames are chained and can be walked to show the stack trace. // So the stack now is // |unused_4_bytes|i|prior_RBP|address_to_return_from_main|... // ^ ^ ^ // | | | // ESP-+ RBP-4---+ +---RBP // <<--towards 0 towards FF..FF -->> jmp .L2 .L3: subl $1, -4(%rbp) // x86 is CISC, many operations rolled into one, "fetch the content of 4 // bytes at address RBP-4, decrement by 1, store it back into memory" leaq .LC0(%rip), %rax // This one is tricky. It will put the address of the string "Hello" into RAX // by computing it as a relative offset to the address of the current // instruction (in RIP). movq %rax, %rdi // By the x86_64 calling convention, any function looks for its first argument // in the RDI register, the second in RSI, then RDX, RCX, R8, R9. Just // ask Perplexity about "amd64 calling convention" :) // You may wonder: why not send the address of "Hello" straight into RDI? The answer is, // compilers like set-piece patterns, but if you ask them to optimize, this and more will happen! call puts@PLT // This will push the address of the next instruction (aka .L2) on the stack, // and will jump to wherever the dynamic linker loaded the "puts" C library // function. PLT is "Procedure Linkage Table", used by the dynamic linker. // We'll look into this in more detail. .L2: cmpl $0, -4(%rbp) // Compare is subtract without saving the result, but with saving the flags. // This subtracts 0 from "i". jns .L3 // If the result is _not_ negative, i.e., Sign flag is not set, jump back // into the loop. // Ask Perplexity about "jns amd64 instruction". movl $42, %eax // According to the x86_64 calling convention, functions put their return // values into RAX leave // This is the equivalent of "mov %rbp, %rsp". The function's frame // is now officially not needed anymore, RSP gets restored to what it was // before we entered the function and created the frame. ret // And now pop the return address from the stack and put it into RIP // We are coming back to just after the CALL instruction that landed us // at the start of this function. .size main, .-main .ident "GCC: (GNU) 15.2.1 20250813" .section .note.GNU-stack,"",@progbits // Let us now see how this actually runs! The GDB debugger will help. [sergey@thepond ~]$ gcc -o hello1 -fno-asynchronous-unwind-tables hello.c [sergey@thepond ~]$ gdb ./hello1 GNU gdb (GDB) 16.3 Copyright (C) 2024 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" and "show warranty" for details. This GDB was configured as "x86_64-pc-linux-gnu". Type "show configuration" for configuration details. For bug reporting instructions, please see: . Find the GDB manual and other documentation resources online at: . For help, type "help". Type "apropos word" to search for commands related to "word"... Reading symbols from ./hello1... This GDB supports auto-downloading debuginfo from the following URLs: Enable debuginfod for this session? (y or [n]) Debuginfod has been disabled. To make this setting permanent, add 'set debuginfod enabled off' to .gdbinit. (No debugging symbols found in ./hello1) // At this point the file is loaded by GDB but not yet loaded into memory to actually run. // So the "addresses" we see are actually offsets into the file, not real memory addresses. (gdb) disas main Dump of assembler code for function main: 0x0000000000001139 <+0>: push %rbp 0x000000000000113a <+1>: mov %rsp,%rbp 0x000000000000113d <+4>: sub $0x10,%rsp 0x0000000000001141 <+8>: movl $0xa,-0x4(%rbp) 0x0000000000001148 <+15>: jmp 0x115d 0x000000000000114a <+17>: subl $0x1,-0x4(%rbp) 0x000000000000114e <+21>: lea 0xeaf(%rip),%rax # 0x2004 0x0000000000001155 <+28>: mov %rax,%rdi 0x0000000000001158 <+31>: call 0x1030 0x000000000000115d <+36>: cmpl $0x0,-0x4(%rbp) 0x0000000000001161 <+40>: jns 0x114a 0x0000000000001163 <+42>: mov $0x2a,%eax 0x0000000000001168 <+47>: leave 0x0000000000001169 <+48>: ret End of assembler dump. // True to the Unix ways, GDB includes its own documentation: (gdb) help disas Disassemble a specified section of memory. Usage: disassemble[/m|/r|/s] START [, END] Default is the function surrounding the pc of the selected frame. With a /s modifier, source lines are included (if available). In this mode, the output is displayed in PC address order, and file names and contents for all relevant source files are displayed. With a /m modifier, source lines are included (if available). This view is "source centric": the output is in source line order, regardless of any optimization that is present. Only the main source file is displayed, not those of, e.g., any inlined functions. This modifier hasn't proved useful in practice and is deprecated in favor of /s. With a /r modifier, raw instructions in hex are included. // <<-- we will use this With a single argument, the function surrounding that address is dumped. Two arguments (separated by a comma) are taken as a range of memory to dump, in the form of "start,end", or "start,+length". Note that the address is interpreted as an expression, not as a location like in the "break" command. So, for example, if you want to disassemble function bar in file foo.c you must type "disassemble 'foo.c'::bar" and not "disassemble foo.c:bar". (gdb) disas main Dump of assembler code for function main: 0x0000000000001139 <+0>: push %rbp 0x000000000000113a <+1>: mov %rsp,%rbp 0x000000000000113d <+4>: sub $0x10,%rsp 0x0000000000001141 <+8>: movl $0xa,-0x4(%rbp) 0x0000000000001148 <+15>: jmp 0x115d 0x000000000000114a <+17>: subl $0x1,-0x4(%rbp) 0x000000000000114e <+21>: lea 0xeaf(%rip),%rax # 0x2004 0x0000000000001155 <+28>: mov %rax,%rdi 0x0000000000001158 <+31>: call 0x1030 0x000000000000115d <+36>: cmpl $0x0,-0x4(%rbp) 0x0000000000001161 <+40>: jns 0x114a 0x0000000000001163 <+42>: mov $0x2a,%eax 0x0000000000001168 <+47>: leave 0x0000000000001169 <+48>: ret End of assembler dump. // Compare: we now see raw bytes of CPU instructions as compiled and linked (but not yet loaded): (gdb) disas /r main Dump of assembler code for function main: 0x0000000000001139 <+0>: 55 push %rbp 0x000000000000113a <+1>: 48 89 e5 mov %rsp,%rbp 0x000000000000113d <+4>: 48 83 ec 10 sub $0x10,%rsp 0x0000000000001141 <+8>: c7 45 fc 0a 00 00 00 movl $0xa,-0x4(%rbp) 0x0000000000001148 <+15>: eb 13 jmp 0x115d 0x000000000000114a <+17>: 83 6d fc 01 subl $0x1,-0x4(%rbp) 0x000000000000114e <+21>: 48 8d 05 af 0e 00 00 lea 0xeaf(%rip),%rax # 0x2004 0x0000000000001155 <+28>: 48 89 c7 mov %rax,%rdi 0x0000000000001158 <+31>: e8 d3 fe ff ff call 0x1030 0x000000000000115d <+36>: 83 7d fc 00 cmpl $0x0,-0x4(%rbp) 0x0000000000001161 <+40>: 79 e7 jns 0x114a 0x0000000000001163 <+42>: b8 2a 00 00 00 mov $0x2a,%eax 0x0000000000001168 <+47>: c9 leave 0x0000000000001169 <+48>: c3 ret End of assembler dump. // It will help to stop at main as we single-step through the instructions. (gdb) b main Breakpoint 1 at 0x113d // Now RUN! (gdb) r Starting program: /home/sergey/hello1 [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/libthread_db.so.1". Breakpoint 1, 0x000055555555513d in main () (gdb) disas /r main Dump of assembler code for function main: 0x0000555555555139 <+0>: 55 push %rbp 0x000055555555513a <+1>: 48 89 e5 mov %rsp,%rbp => 0x000055555555513d <+4>: 48 83 ec 10 sub $0x10,%rsp // <<-- next instruction to execute 0x0000555555555141 <+8>: c7 45 fc 0a 00 00 00 movl $0xa,-0x4(%rbp) 0x0000555555555148 <+15>: eb 13 jmp 0x55555555515d 0x000055555555514a <+17>: 83 6d fc 01 subl $0x1,-0x4(%rbp) 0x000055555555514e <+21>: 48 8d 05 af 0e 00 00 lea 0xeaf(%rip),%rax # 0x555555556004 0x0000555555555155 <+28>: 48 89 c7 mov %rax,%rdi 0x0000555555555158 <+31>: e8 d3 fe ff ff call 0x555555555030 0x000055555555515d <+36>: 83 7d fc 00 cmpl $0x0,-0x4(%rbp) 0x0000555555555161 <+40>: 79 e7 jns 0x55555555514a 0x0000555555555163 <+42>: b8 2a 00 00 00 mov $0x2a,%eax 0x0000555555555168 <+47>: c9 leave 0x0000555555555169 <+48>: c3 ret End of assembler dump. // Show all registers (aka "info registers") (gdb) i r rax 0x7ffff7e10e28 140737352109608 rbx 0x0 0 rcx 0x555555557dd8 93824992247256 rdx 0x7fffffffebf8 140737488350200 rsi 0x7fffffffebe8 140737488350184 rdi 0x1 1 rbp 0x7fffffffeac0 0x7fffffffeac0 rsp 0x7fffffffeac0 0x7fffffffeac0 r8 0x7ffff7e09680 140737352078976 r9 0x7ffff7e0afe0 140737352085472 r10 0x7fffffffe800 140737488349184 r11 0x203 515 r12 0x7fffffffebe8 140737488350184 r13 0x1 1 r14 0x7ffff7ffd000 140737354125312 r15 0x555555557dd8 93824992247256 rip 0x55555555513d 0x55555555513d eflags 0x246 [ PF ZF IF ] cs 0x33 51 ss 0x2b 43 ds 0x0 0 es 0x0 0 fs 0x0 0 gs 0x0 0 k0 0x80000 524288 k1 0x44001021 1140854817 k2 0x0 0 --Type for more, q to quit, c to continue without paging--q Quit // RSP's value has just been copied into RBP. RSP will change, RBP will anchor the local variables and the frame (gdb) i r rsp rbp rsp 0x7fffffffeac0 0x7fffffffeac0 rbp 0x7fffffffeac0 0x7fffffffeac0 // Note that now you will see real addresses where the code has been loaded: (gdb) disas /r main Dump of assembler code for function main: 0x0000555555555139 <+0>: 55 push %rbp 0x000055555555513a <+1>: 48 89 e5 mov %rsp,%rbp => 0x000055555555513d <+4>: 48 83 ec 10 sub $0x10,%rsp // <<-- about to execute this instruction 0x0000555555555141 <+8>: c7 45 fc 0a 00 00 00 movl $0xa,-0x4(%rbp) 0x0000555555555148 <+15>: eb 13 jmp 0x55555555515d 0x000055555555514a <+17>: 83 6d fc 01 subl $0x1,-0x4(%rbp) 0x000055555555514e <+21>: 48 8d 05 af 0e 00 00 lea 0xeaf(%rip),%rax # 0x555555556004 0x0000555555555155 <+28>: 48 89 c7 mov %rax,%rdi 0x0000555555555158 <+31>: e8 d3 fe ff ff call 0x555555555030 0x000055555555515d <+36>: 83 7d fc 00 cmpl $0x0,-0x4(%rbp) 0x0000555555555161 <+40>: 79 e7 jns 0x55555555514a 0x0000555555555163 <+42>: b8 2a 00 00 00 mov $0x2a,%eax 0x0000555555555168 <+47>: c9 leave 0x0000555555555169 <+48>: c3 ret End of assembler dump. // Step exactly one instruction forward. Aka "si" (gdb) stepi 0x0000555555555141 in main () // Observe RSP getting 16 (hex 0x10) subtracted from it to create the frame: (gdb) i r rsp rbp rsp 0x7fffffffeab0 0x7fffffffeab0 rbp 0x7fffffffeac0 0x7fffffffeac0 // Let's examine ("x" command) memory at the address of the string through several views: // As 10 characters: (gdb) x/10c 0x555555556004 0x555555556004: 72 'H' 101 'e' 108 'l' 108 'l' 111 'o' 0 '\000' 0 '\000' 0 '\000' 0x55555555600c: 1 '\001' 27 '\033' // as 10 bytes in hexadecimal: (gdb) x/10x 0x555555556004 0x555555556004: 0x48 0x65 0x6c 0x6c 0x6f 0x00 0x00 0x00 0x55555555600c: 0x01 0x1b // As a null-terminated string: (gdb) x/s 0x555555556004 0x555555556004: "Hello" // Now we are about to write 10 into out local variable slot we made to "i" in the frame: (gdb) disas main Dump of assembler code for function main: 0x0000555555555139 <+0>: push %rbp 0x000055555555513a <+1>: mov %rsp,%rbp 0x000055555555513d <+4>: sub $0x10,%rsp => 0x0000555555555141 <+8>: movl $0xa,-0x4(%rbp) 0x0000555555555148 <+15>: jmp 0x55555555515d 0x000055555555514a <+17>: subl $0x1,-0x4(%rbp) 0x000055555555514e <+21>: lea 0xeaf(%rip),%rax # 0x555555556004 0x0000555555555155 <+28>: mov %rax,%rdi 0x0000555555555158 <+31>: call 0x555555555030 0x000055555555515d <+36>: cmpl $0x0,-0x4(%rbp) 0x0000555555555161 <+40>: jns 0x55555555514a 0x0000555555555163 <+42>: mov $0x2a,%eax 0x0000555555555168 <+47>: leave 0x0000555555555169 <+48>: ret End of assembler dump. // It's hard to remember the x/.. syntax for interpreting and displaying memory. Help to the rescue: (gdb) help x Examine memory: x/FMT ADDRESS. ADDRESS is an expression for the memory address to examine. FMT is a repeat count followed by a format letter and a size letter. Format letters are o(octal), x(hex), d(decimal), u(unsigned decimal), t(binary), f(float), a(address), i(instruction), c(char), s(string) and z(hex, zero padded on the left). Size letters are b(byte), h(halfword), w(word), g(giant, 8 bytes). The specified number of objects of the specified size are printed according to the format. If a negative number is specified, memory is examined backward from the address. Defaults for format and size letters are those previously used. Default count is 1. Default address is following last thing printed with this command or "print". (gdb) disas main Dump of assembler code for function main: 0x0000555555555139 <+0>: push %rbp 0x000055555555513a <+1>: mov %rsp,%rbp 0x000055555555513d <+4>: sub $0x10,%rsp => 0x0000555555555141 <+8>: movl $0xa,-0x4(%rbp) 0x0000555555555148 <+15>: jmp 0x55555555515d 0x000055555555514a <+17>: subl $0x1,-0x4(%rbp) 0x000055555555514e <+21>: lea 0xeaf(%rip),%rax # 0x555555556004 0x0000555555555155 <+28>: mov %rax,%rdi 0x0000555555555158 <+31>: call 0x555555555030 0x000055555555515d <+36>: cmpl $0x0,-0x4(%rbp) 0x0000555555555161 <+40>: jns 0x55555555514a 0x0000555555555163 <+42>: mov $0x2a,%eax 0x0000555555555168 <+47>: leave 0x0000555555555169 <+48>: ret End of assembler dump. // Let's see what's on the stack. I am rusty on these, fumbling a bit: (gdb) x/1w $rbp 0x7fffffffeac0: U"\xffffeb60\377\177\000\000\xf7c27675\377\177\000\000\xf7fc2000\377\177\000\000\xffffebe8\377\177\000\000\xffffeb20\001\x55555139\125\125\000\000" // No, I want hex! (gdb) x/1xw $rbp 0x7fffffffeac0: 0xffffeb60 // No, I want the full 8 bytes. (gdb) x/1xg $rbp 0x7fffffffeac0: 0x00007fffffffeb60 // <<- OK, this is the previous RBP, saved here by "PUSHQ %RBP" // What's in the 4 bytes notionally allocated for "i"? (gdb) x/1xw $rbp-4 0x7fffffffeabc: 0x00007fff <<-- Something left over from previous uses of that memoery // But now let's store "10" there, by executing the instruction: (gdb) si 0x0000555555555148 in main () // And now it has 10, in hex (0xa): (gdb) x/1xw $rbp-4 0x7fffffffeabc: 0x0000000a // Let us now visit this place in code every iteration of the loop: (gdb) b *0x000055555555515d Breakpoint 2 at 0x55555555515d // Run till we next hit it: (gdb) cont Continuing. Breakpoint 2, 0x000055555555515d in main () (gdb) disas main Dump of assembler code for function main: 0x0000555555555139 <+0>: push %rbp 0x000055555555513a <+1>: mov %rsp,%rbp 0x000055555555513d <+4>: sub $0x10,%rsp 0x0000555555555141 <+8>: movl $0xa,-0x4(%rbp) 0x0000555555555148 <+15>: jmp 0x55555555515d 0x000055555555514a <+17>: subl $0x1,-0x4(%rbp) 0x000055555555514e <+21>: lea 0xeaf(%rip),%rax # 0x555555556004 0x0000555555555155 <+28>: mov %rax,%rdi 0x0000555555555158 <+31>: call 0x555555555030 => 0x000055555555515d <+36>: cmpl $0x0,-0x4(%rbp) 0x0000555555555161 <+40>: jns 0x55555555514a 0x0000555555555163 <+42>: mov $0x2a,%eax 0x0000555555555168 <+47>: leave 0x0000555555555169 <+48>: ret End of assembler dump. // We came here via the "jmp" at 0x0000555555555148. So we jumped over subl this time: (gdb) x/1xw $rbp-4 0x7fffffffeabc: 0x0000000a (gdb) c Continuing. Hello // And now we circle around and the "i" is 9: Breakpoint 2, 0x000055555555515d in main () (gdb) x/1xw $rbp-4 0x7fffffffeabc: 0x00000009 (gdb) c Continuing. Hello Breakpoint 2, 0x000055555555515d in main () // And now 8: (gdb) x/1xw $rbp-4 0x7fffffffeabc: 0x00000008 // Let's see the stack differently: (gdb) x/4xg $rbp 0x7fffffffeac0: 0x00007fffffffeb60 0x00007ffff7c27675 0x7fffffffead0: 0x00007ffff7fc2000 0x00007fffffffebe8 // Wait, this is not our frame! That's actually the next frame. Our frame can be seen by starting // the display at RBP-16: (gdb) x/4xg $rbp-16 0x7fffffffeab0: 0x0000000000000000 0x00000008f7fe1e90 ^^^^^^^^^^ <<-- this is our "i", 4 bytes at EBP-4 0x7fffffffeac0: 0x00007fffffffeb60 0x00007ffff7c27675 // Slice it by 4-byte chucks: (gdb) x/4xw $rbp-16 0x7fffffffeab0: 0x00000000 0x00000000 0xf7fe1e90 0x00000008 // Slice it by bytes in hex: (gdb) x/16xb $rbp-16 0x7fffffffeab0: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x7fffffffeab8: 0x90 0x1e 0xfe 0xf7 0x08 0x00 0x00 0x00 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Our "i" is little-endian, in a multi-byte value, least significant byte first // We will look more at GDB's way to single-step through instructions and examine memory // next class. // Observe now how the compiler will change the code when told to optimize at level 2: [sergey@thepond ~]$ gcc -fno-asynchronous-unwind-tables -O2 -S hello.c [sergey@thepond ~]$ cat hello-O2.s .file "hello.c" .text .section .rodata.str1.1,"aMS",@progbits,1 .LC0: .string "Hello" .section .text.startup,"ax",@progbits .p2align 4 .globl main .type main, @function main: // We are not reserving a stack frame! No RSP adjustment or RBP saving. pushq %rbx // Instead, we are saving EBX, because we are going to keep "i" in EBX. // By calling convention, we must preserve and restore its prior contents. movl $11, %ebx // "i" will live in a register instead! // Also notice that GCC figured out we wanted 11 repetitions, and did so. .p2align 4 .p2align 3 .L2: leaq .LC0(%rip), %rdi // The string "Hello"'s address is placed in RDI, as per calling convention call puts@PLT subl $1, %ebx // Wouldn't calling puts() clobber EBX? No, because we depend on puts() // to save it on entry and restore it for us, just like we do. jne .L2 // Jump if the Zero flag is not set (as a result of prior operation, subl) movl $42, %eax popq %rbx // And now we restore EBX from the stack ret .size main, .-main .ident "GCC: (GNU) 15.2.1 20250813" .section .note.GNU-stack,"",@progbits