// Recursive definitions are very graceful, and will be extremely useful for data types // such as lists and trees, as we'll see next week in LISP and later even more so // in strongly typed functional languages such as ML, Haskell, and Rust. // The stack frame is a miracle of separating memory footprints of individual calls // to the same function, which run exactly the same code. In the fact() example, // the magic is in the combination of the relative addressing of all locals off RBP, // the preamble that saves the prior RBP ("push %rbp; mov %rsp, %rbp") and the post-amble // that restores is ("leave" is roughly "mov %rbp, %rsp; pop %rbp", preamble in reverse. // // Note that "CALL fact" acts kind of like a loop: execution jumps back to the start of the // function. The additional stack operations such as CALL pushing the address of the next // instruction to come back to after CALL and RET popping that address off the stack into RIP // keep the data context and the code flow in sync. // Note that the stack to store return addresses can be _separate_ from the stack where // local data it stored. Indeed, MacOS/Darwin kernel uses separate stacks for data and // return addresses! So CALL and RET use a separate stack than PUSH and POP/LEAVE. // Also note that the naive implementation of recursive functions can be very wasteful. // Consider this recursive function of Fibonacci numbers: [sergey@thepond ~]$ cat fib.c #include /* a very naive recursive Fibonacci number implementation */ unsigned int fib(unsigned int n) { if( 0 == n ) return 1; if( 1 == n ) return 1; return fib(n-1) + fib(n-2); } int main() { printf("%d\n", fib(5)); return 0; } // I am tired of back-searching for the magic -fno-asynchronous-unwind-tables option, // so let's make a shell alias: [sergey@thepond ~]$ alias gcc-S="gcc -Wall -S -fno-asynchronous-unwind-tables" [sergey@thepond ~]$ gcc-S fib.c [sergey@thepond ~]$ cat fib.s .file "fib.c" .text .globl fib .type fib, @function fib: pushq %rbp movq %rsp, %rbp pushq %rbx // we are going to use RBX as temporary storage, so save it subq $24, %rsp movl %edi, -20(%rbp) // save the argument n cmpl $0, -20(%rbp) // is it 0? -- base case jne .L2 // if not, jump to the next check movl $1, %eax // fall through here if yes, prepare to return 1 jmp .L3 // jump to post-amble and RET .L2: cmpl $1, -20(%rbp) // is it 1? -- base case jne .L4 // if not, jump to the next thing movl $1, %eax // fall through here if yes, prepare to return 1 jmp .L3 // jump to post-amble and RET .L4: movl -20(%rbp), %eax // inductive step, get the argument subl $1, %eax movl %eax, %edi call fib movl %eax, %ebx // saving the value returned by fib(n-1) in EBX movl -20(%rbp), %eax . subl $2, %eax . movl %eax, %edi . call fib . addl %ebx, %eax // add EBX and the return value of f(n-2) just returned in EAX .L3: movq -8(%rbp), %rbx // restore RBX to what it was when we entered the function leave ret // Note that without saving EBX in the preamble and restoring it in the post-amble // the above code would not work. What would happen? ... skipped the rest .... [sergey@thepond ~]$ gcc -o fib fib.s [sergey@thepond ~]$ ./fib 8 // fib(5) // But, how many calls and frames? Let's see with GDB [sergey@thepond ~]$ gdb ./fib GNU gdb (GDB) 16.3 (gdb) disas fib Dump of assembler code for function fib: 0x0000000000001139 <+0>: push %rbp 0x000000000000113a <+1>: mov %rsp,%rbp 0x000000000000113d <+4>: push %rbx 0x000000000000113e <+5>: sub $0x18,%rsp 0x0000000000001142 <+9>: mov %edi,-0x14(%rbp) 0x0000000000001145 <+12>: cmpl $0x0,-0x14(%rbp) 0x0000000000001149 <+16>: jne 0x1152 0x000000000000114b <+18>: mov $0x1,%eax 0x0000000000001150 <+23>: jmp 0x117d 0x0000000000001152 <+25>: cmpl $0x1,-0x14(%rbp) 0x0000000000001156 <+29>: jne 0x115f 0x0000000000001158 <+31>: mov $0x1,%eax 0x000000000000115d <+36>: jmp 0x117d 0x000000000000115f <+38>: mov -0x14(%rbp),%eax 0x0000000000001162 <+41>: sub $0x1,%eax 0x0000000000001165 <+44>: mov %eax,%edi 0x0000000000001167 <+46>: call 0x1139 0x000000000000116c <+51>: mov %eax,%ebx 0x000000000000116e <+53>: mov -0x14(%rbp),%eax 0x0000000000001171 <+56>: sub $0x2,%eax 0x0000000000001174 <+59>: mov %eax,%edi 0x0000000000001176 <+61>: call 0x1139 0x000000000000117b <+66>: add %ebx,%eax 0x000000000000117d <+68>: mov -0x8(%rbp),%rbx 0x0000000000001181 <+72>: leave 0x0000000000001182 <+73>: ret End of assembler dump. (gdb) b fib Breakpoint 1 at 0x113d // Let's just print a line for each call to fib, with the argument given (gdb) command 1 Type commands for breakpoint(s) 1, one per line. End with a line saying just "end". >print $edi // <<-- or printf "fib(%d)\n", $edi if we want fancy >continue >end (gdb) r Starting program: /home/sergey/fib [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/libthread_db.so.1". Breakpoint 1, 0x000055555555513d in fib () $1 = 5 Breakpoint 1, 0x000055555555513d in fib () $2 = 4 Breakpoint 1, 0x000055555555513d in fib () $3 = 3 Breakpoint 1, 0x000055555555513d in fib () $4 = 2 Breakpoint 1, 0x000055555555513d in fib () $5 = 1 Breakpoint 1, 0x000055555555513d in fib () $6 = 0 Breakpoint 1, 0x000055555555513d in fib () $7 = 1 Breakpoint 1, 0x000055555555513d in fib () $8 = 2 Breakpoint 1, 0x000055555555513d in fib () $9 = 1 Breakpoint 1, 0x000055555555513d in fib () $10 = 0 Breakpoint 1, 0x000055555555513d in fib () $11 = 3 Breakpoint 1, 0x000055555555513d in fib () $12 = 2 Breakpoint 1, 0x000055555555513d in fib () $13 = 1 Breakpoint 1, 0x000055555555513d in fib () $14 = 0 Breakpoint 1, 0x000055555555513d in fib () $15 = 1 // <<--- 15 temporary values (you can use them at the GDB shell) 8 [Inferior 1 (process 500702) exited normally] (gdb) info br Num Type Disp Enb Address What 1 breakpoint keep y 0x000055555555513d breakpoint already hit 15 times // <<--- Hit 15 times, so 15 calls. print( "fib %d\n", $edi) continue (gdb) quit // In a less naive implementation, values returned by previous invocations of fib(k) // will be memorized in a table and looked up rather than continually recalculated. // This is known as memoization or dynamic programming. ===================================================================================== // And now for an even darker side of stack frames! // Consider the following program: [sergey@thepond ~]$ cat gets2.c #include char *gets(char *s); // Needed because gets() is so bad, it's not included in Alpine's // standard libc headers. Only fgets() is present, check with gcc -E int main() { struct inps { char c[10]; // array of 10 chars int cnt; // 4 byte integer } inp; // how long is this struct? _At least_ 14 bytes: struct members aren't // guaranteed to be adjacent, unless __attribute__((packed)) is added. int i; inp.cnt = 10; gets( inp.c); i = inp.cnt; while( i >= 0 ){ i = i - 1; puts(inp.c); } return 42; } // If you try to compile it without the explicit declaration of gets(), you'll get an error: // "gets? What gets? You mean fgets, right?" [sergey@thepond ~]$ gcc -Wall -o gets2 gets2.c gets2.c: In function 'main': gets2.c:14:3: error: implicit declaration of function 'gets'; did you mean 'fgets'? [-Wimplicit-function-declaration] 14 | gets( inp.c); | ^~~~ | fgets [sergey@thepond ~]$ gcc -E gets2.c | grep gets | grep -v ^# extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream) ... and my own uses of it ... // Also check out the manual page, "man gets". It tells you to never use this function! Why? // The linker will warn you in its turn, too: [sergey@thepond ~]$ gcc -Wall -o gets2 gets2.c /usr/bin/ld: /tmp/ccqnF9S4.o: in function `main': gets2.c:(.text+0x26): warning: the `gets' function is dangerous and should not be used. // OK, we finally managed to compile it. Let's run it: // gets reads characters from Unix's standard input and copies them into memory, // starting at the address provided, until it encounters the character 0x00 or // the end of the input stream. Gets() writes 0x00 at the end of the copied string. [sergey@thepond ~]$ ./gets2 hello // <<-- we type this hello // it gets echoed a bunch of times hello hello hello hello hello hello hello hello hello hello // Let's use echo and the Unix pipe "|" to control the input: [sergey@thepond ~]$ echo "hello" | ./gets2 hello hello hello hello hello hello hello hello hello hello hello // We can pipe the output of the program to wc to count lines: [sergey@thepond ~]$ echo "hello" | ./gets2 | wc 11 11 66 // So I prepared some inputs: [sergey@thepond ~]$ cat A1 A10 A100 A11 A12 A13 A14 A15 A16 A17 [sergey@thepond ~]$ cat A11 AAAAAAAAAAA [sergey@thepond ~]$ wc A11 0 1 11 A11 // Note that there is no final newline here, just pure 'A's // (xxd shows the exact bytes in a file) [sergey@thepond ~]$ xxd A10 00000000: 4141 4141 4141 4141 4141 AAAAAAAAAA // Let's feed ten 'A's into the program. That will result in gets() writing 11 chars, // counting the final 0x00 (a.k.a. '\0') [sergey@thepond ~]$ cat A10 | ./gets2 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA // Seems to be fine... [sergey@thepond ~]$ cat A11 | ./gets2 | wc 11 11 132 // Let's write 11 'A's. Gets() will write 11 'A's and a '\0' [sergey@thepond ~]$ cat A11 | ./gets2 AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA // Seems fine again... But what is this? [sergey@thepond ~]$ cat A12 | ./gets2 AAAAAAAAAAAA // Just one line instead of 11! Mystery. Shall we continue? [sergey@thepond ~]$ cat A13 | ./gets2 AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA /// keep scrolling :) AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA AAAAAAAAAAAAA // That's 66 lines, reproducibly. [sergey@thepond ~]$ cat A13 | ./gets2 | wc 66 66 924 // What gives? Let's see more: [sergey@thepond ~]$ cat A14 | ./gets2 | wc 16706 16706 250590 [sergey@thepond ~]$ cat A15 | ./gets2 | wc 4276546 4276546 68424736 // What if we take 100 'A's? [sergey@thepond ~]$ wc A100 0 1 100 A100 [sergey@thepond ~]$ cat A100 | ./gets2 > /dev/null *** stack smashing detected ***: terminated Aborted (core dumped) // At some point, the mystery gets to be too much for the standard library :) // Let's see what's going on. [sergey@thepond ~]$ gdb ./gets2 GNU gdb (GDB) 16.3 (No debugging symbols found in ./gets2) (gdb) b main Breakpoint 1 at 0x115d (gdb) r Starting program: /home/sergey/gets2 [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/libthread_db.so.1". Breakpoint 1, 0x000055555555515d in main () (gdb) disas Dump of assembler code for function main: 0x0000555555555159 <+0>: push %rbp 0x000055555555515a <+1>: mov %rsp,%rbp => 0x000055555555515d <+4>: sub $0x30,%rsp // You can skip the following on the first reading 0x0000555555555161 <+8>: mov %fs:0x28,%rax // this gets a value from "thread-local storage", // an area that a Unix thread doesn't share with others, unlike the rest of address space. // This uses an x86-specific gimmick of segment selectors that are a part of the thread context, // context-switched together with registers. If this doesn't sound familiar, just think of this as // a per-thread secret value squirreled away somewhere safe away from the stack 0x000055555555516a <+17>: mov %rax,-0x8(%rbp) // ...and this value is saved at RBP-8. // it will serve as a "canary" for the stack. // Read on :) 0x000055555555516e <+21>: xor %eax,%eax 0x0000555555555170 <+23>: movl $0xa,-0x14(%rbp) // inp.cnt = 10 0x0000555555555177 <+30>: lea -0x20(%rbp),%rax // the address of inp.c 0x000055555555517b <+34>: mov %rax,%rdi // .. will be the argument to gets() 0x000055555555517e <+37>: call 0x555555555050 0x0000555555555183 <+42>: mov -0x14(%rbp),%eax // now we read inp.cnt back 0x0000555555555186 <+45>: mov %eax,-0x24(%rbp) // ... into "i" 0x0000555555555189 <+48>: jmp 0x55555555519b // typical trick for loops // that run at least once 0x000055555555518b <+50>: subl $0x1,-0x24(%rbp) // i = i-1 0x000055555555518f <+54>: lea -0x20(%rbp),%rax // address of inp.c 0x0000555555555193 <+58>: mov %rax,%rdi // .. will be passed to puts() 0x0000555555555196 <+61>: call 0x555555555030 0x000055555555519b <+66>: cmpl $0x0,-0x24(%rbp) // compare "i" with 0 0x000055555555519f <+70>: jns 0x55555555518b // if not negative, jump back 0x00005555555551a1 <+72>: mov $0x2a,%eax 0x00005555555551a6 <+77>: mov -0x8(%rbp),%rdx // load the canary from the stack 0x00005555555551aa <+81>: sub %fs:0x28,%rdx // is it the same as the secret value? 0x00005555555551b3 <+90>: je 0x5555555551ba // if so, return 0x00005555555551b5 <+92>: call 0x555555555040 <__stack_chk_fail@plt> // if not, signal trouble 0x00005555555551ba <+97>: leave 0x00005555555551bb <+98>: ret End of assembler dump. // GDB allows us to run the program with the prepared input from a file (gdb) r < A12 The program being debugged has been started already. Start it from the beginning? (y or n) y Starting program: /home/sergey/gets2 < A12 [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/libthread_db.so.1". Breakpoint 1, 0x000055555555515d in main () (gdb) c Continuing. AAAAAAAAAAAA [Inferior 1 (process 501913) exited with code 052] // What we want is to watch when inp.cnt gets written. A hardware _watchpoint_, i.e., // a breakpoint that triggers when an address is written (or read, or both), is what we need. // I fumbled a lot here, see a separate "outtakes" log for the explanations of my errors. // But the answer was really simple: compute and use an absolute address (prefixed with a *, // otherwise it will be confused for a function name), _after_ entering into main(): (gdb) i r rbp rbp 0x7fffffffeac0 0x7fffffffeac0 (gdb) watch *(0x7fffffffeac0-0x14) Hardware watchpoint 2: *(0x7fffffffeac0-0x14) (gdb) c Continuing. Hardware watchpoint 2: *(0x7fffffffeac0-0x14) Old value = 0 New value = 10 // inp.cnt = 10 gets written 0x0000555555555177 in main () (gdb) b *0x0000555555555183 // just after gets() call Breakpoint 3 at 0x555555555183 (gdb) c Continuing. Breakpoint 3, 0x0000555555555183 in main () (gdb) x/10xg $rsp 0x7fffffffea90: 0x0000000000000000 0x0000000000000000 0x7fffffffeaa0: 0x4141414141414141 0x0000000a00004141 // <<-- inp struct 0x7fffffffeab0: 0x0000000000000000 0x492cc7f105989500 0x7fffffffeac0: 0x00007fffffffeb60 0x00007ffff7c27675 0x7fffffffead0: 0x00007ffff7fc2000 0x00007fffffffebe8 // More granularly: (gdb) x/16xb 0x7fffffffeaa0 0x7fffffffeaa0: 0x41 0x41 0x41 0x41 0x41 0x41 0x41 0x41 <<-- 10-byte buffer inp.c 0x7fffffffeaa8: 0x41 0x41 0x00 0x00 0x0a 0x00 0x00 0x00 --------->>| ^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 2 slack bytes int inp.cnt // Note that the actual length of struct inp as allocated is 16 bytes, not 10. There are 2 unused // "slack" bytes between the 10 byte buffer "c" and the integer "cnt", likely for nicer alignment. (gdb) c Continuing. AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA Hardware watchpoint 2: *(0x7fffffffeac0-0x14) // this is a spurious trigger, our program is already // finished. Some libc runtime function is using // the stack. Old value = 10 New value = 21845 0x00007ffff7c40aca in ?? () from /usr/lib/libc.so.6 (gdb) info b Num Type Disp Enb Address What 1 breakpoint keep y 0x000055555555515d breakpoint already hit 1 time 2 hw watchpoint keep y *(0x7fffffffeac0-0x14) breakpoint already hit 2 times 3 breakpoint keep y 0x0000555555555183 breakpoint already hit 1 time // Another run, with 'A'x11 times // We don't want to see any triggering of our watchpoint before we enter main(): (gdb) disable 2 (gdb) commands 1 Type commands for breakpoint(s) 1, one per line. End with a line saying just "end". >enable 2 >continue >end // .. or after gets() is done: (gdb) commands 3 Type commands for breakpoint(s) 1, one per line. End with a line saying just "end". >disable 2 >continue >end (gdb) r < A11 The program being debugged has been started already. Start it from the beginning? (y or n) y Starting program: /home/sergey/gets2 < A11 [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/libthread_db.so.1". Breakpoint 1, 0x000055555555515d in main () Hardware watchpoint 2: *(0x7fffffffeac0-0x14) Old value = 0 New value = 10 /// <<-- normal writing of inp.cnt = 10 0x0000555555555177 in main () (gdb) c Continuing. Breakpoint 3, 0x0000555555555183 in main () (gdb) disas Dump of assembler code for function main: 0x0000555555555159 <+0>: push %rbp 0x000055555555515a <+1>: mov %rsp,%rbp 0x000055555555515d <+4>: sub $0x30,%rsp 0x0000555555555161 <+8>: mov %fs:0x28,%rax 0x000055555555516a <+17>: mov %rax,-0x8(%rbp) 0x000055555555516e <+21>: xor %eax,%eax 0x0000555555555170 <+23>: movl $0xa,-0x14(%rbp) 0x0000555555555177 <+30>: lea -0x20(%rbp),%rax 0x000055555555517b <+34>: mov %rax,%rdi 0x000055555555517e <+37>: call 0x555555555050 => 0x0000555555555183 <+42>: mov -0x14(%rbp),%eax 0x0000555555555186 <+45>: mov %eax,-0x24(%rbp) 0x0000555555555189 <+48>: jmp 0x55555555519b 0x000055555555518b <+50>: subl $0x1,-0x24(%rbp) 0x000055555555518f <+54>: lea -0x20(%rbp),%rax 0x0000555555555193 <+58>: mov %rax,%rdi 0x0000555555555196 <+61>: call 0x555555555030 0x000055555555519b <+66>: cmpl $0x0,-0x24(%rbp) 0x000055555555519f <+70>: jns 0x55555555518b 0x00005555555551a1 <+72>: mov $0x2a,%eax 0x00005555555551a6 <+77>: mov -0x8(%rbp),%rdx 0x00005555555551aa <+81>: sub %fs:0x28,%rdx 0x00005555555551b3 <+90>: je 0x5555555551ba 0x00005555555551b5 <+92>: call 0x555555555040 <__stack_chk_fail@plt> 0x00005555555551ba <+97>: leave 0x00005555555551bb <+98>: ret End of assembler dump. (gdb) x/16xb 0x7fffffffeaa0 0x7fffffffeaa0: 0x41 0x41 0x41 0x41 0x41 0x41 0x41 0x41 0x7fffffffeaa8: 0x41 0x41 0x41 0x00 0x0a 0x00 0x00 0x00 ^^^^^^^^^^^^ cutting into slack bytes now // But not enough to overwrite the inp.cnt integer yet. (gdb) c Continuing. AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA AAAAAAAAAAA (gdb) r < A12 The program being debugged has been started already. Start it from the beginning? (y or n) y Starting program: /home/sergey/gets2 < A12 [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/libthread_db.so.1". Breakpoint 1, 0x000055555555515d in main () Hardware watchpoint 2: *(0x7fffffffeac0-0x14) Old value = 0 New value = 10 // <<-- normal again 0x0000555555555177 in main () (gdb) c Continuing. Hardware watchpoint 2: *(0x7fffffffeac0-0x14) Old value = 10 New value = 0 // <<<----- UH-OH !!!! 0x00007ffff7c81f42 in gets () from /usr/lib/libc.so.6 (gdb) x/16xb 0x7fffffffeaa0 0x7fffffffeaa0: 0x41 0x41 0x41 0x41 0x41 0x41 0x41 0x41 0x7fffffffeaa8: 0x41 0x41 0x41 0x41 0x00 0x00 0x00 0x00 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ cnt is now overwritten, its lowest byte is now 0 // Now we see the exact mechanism of the mystery. It's a combination of // low-endian integer representation and gets() writing past the intended // end of inp.c[10] (gdb) r < A13 The program being debugged has been started already. Start it from the beginning? (y or n) y Starting program: /home/sergey/gets2 < A13 [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/libthread_db.so.1". Breakpoint 1, 0x000055555555515d in main () Hardware watchpoint 2: *(0x7fffffffeac0-0x14) Old value = 0 New value = 10 // normal, as expected 0x0000555555555177 in main () (gdb) c Continuing. Hardware watchpoint 2: *(0x7fffffffeac0-0x14) Old value = 10 New value = 65 // <<---- overwriting with 0x41 = 65 0x00007ffff7d7dd90 in ?? () from /usr/lib/libc.so.6 (gdb) x/16xb 0x7fffffffeaa0 0x7fffffffeaa0: 0x41 0x41 0x41 0x41 0x41 0x41 0x41 0x41 0x7fffffffeaa8: 0x41 0x41 0x41 0x41 0x41 0x00 0x00 0x00 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ // This explains 66 repetitions of the input string (gdb) r < A14 The program being debugged has been started already. Start it from the beginning? (y or n) y Starting program: /home/sergey/gets2 < A14 [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/libthread_db.so.1". Breakpoint 1, 0x000055555555515d in main () Hardware watchpoint 2: *(0x7fffffffeac0-0x14) Old value = 0 New value = 10 0x0000555555555177 in main () (gdb) c Continuing. Hardware watchpoint 2: *(0x7fffffffeac0-0x14) Old value = 10 New value = 16705 0x00007ffff7d7dd90 in ?? () from /usr/lib/libc.so.6 (gdb) x/16xb 0x7fffffffeaa0 0x7fffffffeaa0: 0x41 0x41 0x41 0x41 0x41 0x41 0x41 0x41 0x7fffffffeaa8: 0x41 0x41 0x41 0x41 0x41 0x41 0x00 0x00 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ // ... and this explains 16705 // Mystery solved. Note that none of these goes far enough to overwrite the // canary at RBP-8. Only 'A'x100 does that in our example. // The stack frame canary scheme protects the saved RBP and the return address, // but serious violations of the intended program workflow can happen before // this scheme triggers (if at all). This is the essence of exploitation (in this case, // memory corruption-based exploitation). // So the darkest secret of the stack frame is that it's mostly imaginary. It's just memory, // and there's almost nothing that enforces our intended uses of its bytes and offsets // at the CPU instruction level. _Any compiled code allows many many executions that // were never meant by the original source code and yet are repeatable and stable._ // Designing languages, compilers, and runtime systems that are better at enforcing // programmer's intent without uneconomical cost and performance burdens remains // a wide-open research question. // It took about 30 years of developing various mathematical ideas to get to the point // that the code is simultaneously fast and a lot more expressive of intent. This // is the essence of PL as a field.