;;; Emacs LISP is byte-compiled to the bytecode for a very simple virtual machine. This ;;; machine executes one function at a time and has a "stack" where it holds its ;;; arguments and data. In particular, the slots of the stack contain values such as ;;; fixnum integers, and references, e.g., to cons cells or symbols. The machine has no ;;; other memory besides the stack, so all values for VM's opcodes operate on either go ;;; on the stack directly, or are referenced from the stack slots. ;;; Detailed documentation of the VM is at https://rocky.github.io/elisp-bytecode.pdf ;;; Skim section 1, read section 2, and search this document for any bytecodes you encounter. ;;; ;;; For a short summary of bytecode, see https://nullprogram.com/blog/2014/01/04/ ;;; ;;; But most of all, disassemble and read any functions you can think of! (defun add-one (x) (+ x 1)) ; in class I called it 'add1', which just happens ; to be a VM bytecode that consumes the top of the ; stack (TOS) and replaces it with an incremented value. ; Here I use different names to avoid confusing the bytecode ; names with the function names. add-one (defun add-this (x y) (+ x y)) add-this (disassemble 'add-one) nil (disassemble 'add-this) nil ;;; An aside: In the class we looked at disassembly in a separate buffer, which is Emacs' ;;; default way of showing it. For this note, I want to paste the disassembly right into ;;; this buffer. So I used "M-x find-function" to look at the code of disassemble (do it), ;;; and saw that is calls (disassemble-internal object indent nil) to generate the text. ;;; So here goes a "disassemble-here" macro (see Lisp Macros in OnLisp) (defmacro disas (obj) `(disassemble-internal ,obj 2 nil)) disas ;;; When reading the bytecode, remember that the byte compiler makes sure to keep original argument ;;; so that it's available throughout the function. To act on the arguments, copies are made first, ;;; then bytecodes act on these copies. The original is always pushed out of the way. (disas 'add-one) doc: ... args: (arg1) ;; The stack is now [ arg1 ] // Here I put the top of the stack (TOS) on the left. 0 dup ;; [ arg1 arg1 ] // Value at TOS is duplicated (see above) 1 add1 ;; [ (arg1+1) arg1 ] // return value is placed on top of the stack 2 return ;; // nil (disas 'add-this) doc: ... ;; TOS S[1] S[2] .... args: (arg1 arg2) ;; [ args1 args2 ] 0 stack-ref 1 ;; [ args2 args1 args2 ] // Duplicate arg2 onto the top, from the 1st slot off the top (S[0] is TOS, S[1] is the 1st slot, etc. 1 stack-ref 1 ;; [ args1 args2 args1 args2 ] // ... and push arg1 onto the top next 2 plus ;; [ (args1+args2) args1 args2 ] // Consume two top slots, replace with their sum on top 3 return ;; usually, you reading is right if by the end of the function you are left with just the return value and the original arguments nil (defun mp (x) (cons x x)) mp (mp 3) (3 . 3) (disas 'mp) doc: ... args: (arg1) ;; [ args1 ] 0 dup ;; [ args1 args1 ] 1 stack-ref 1 ;; [ args1 args1 args1 ] 2 cons ;; [ (args1 . args1) args1 ] // reference to the dotted pair is the return value 3 return nil ;;; OK, we can now take on the naive non-tail-recursive factorial, n! ;;; Notice that the content of the stack are _all_ of the state that we need to know to continue computing, ;;; except for the symbols such as the function's own body referenced from the stack (defun fact-naive (n) (if (= n 0) 1 (* n (fact-naive (1- n))))) fact-naive (fact-naive 7) ;; recall 1!=1, 2!=2, 3!=6, 4!=24, 5!=120, 6!=720 5040 (disas 'fact-naive) ;; Here I am going to use "n" instead of args1 doc: ... args: (arg1) ;; [ n ] 0 dup ;; [ n n ] 1 constant 0 ;; [ 0 n n ] // disassembler shows the actual value pushed, the actual bytecode ;; // encodes the index into the vector of constants created by ;; // the byte-compiler alongside the code. 2 eqlsign ;; [ (= 0 n) n ] // [ t 0 ] or [ nil n ] for n > 0 3 goto-if-nil 1 ;; [ n ] // pops off the value on top of the stack (t or nil) 6 constant 1 ;; [ 1 0 ] if we got here, n == 0 7 return ;; returns 1 for (fact-naive 0) 8:1 dup ;; [ n n ] if we got here, n > 0 9 constant fact-naive ;; [ fact-naive n n ] 10 stack-ref 2 ;; [ n fact-naive n n ] // the original argument is copied onto TOS 11 sub1 ;; [ (n-1) fact-naive n n ] 12 call 1 ;; [ (n-1)! n n ] // the call to fact-naive of (n-1) is ;; // executed on a different copy of VM, then returns here 13 mult ;; [ n*(n-1)! n ] // n! is now at TOS 14 return nil ;; This is nice, but "call" causes many copies of the per-invocation environments to be created then destroyed. ;; Will tail recursion help here? (defun fact (n acc) (if (= n 0) acc (fact (1- n) (* n acc)))) fact ;; Follow this through a computation (disas 'fact) doc: ... args: (arg1 arg2) 0 stack-ref 1 1 constant 0 2 eqlsign 3 goto-if-nil 1 6 return 7:1 constant fact 8 stack-ref 2 9 sub1 10 stack-ref 3 11 stack-ref 3 12 mult 13 call 2 ;; <<-- we still incur the heavy cost of creating copies of the environment 14 return nil ;; So Emacs isn't as great about doing tail call optimization (TCO) by default as SBCL. But there ;; is a dedicated TCO primitive: named-let . Essentially, the use of named-let signals to Emacs ;; that tail-call optimization is intended. ;; ;; See https://0branch.com/notes/tco-cl.html for more. (defun fact-tc (nn) "This computes n! tail-recursively" ; this time we'll follow Emacs style and provide a documentation one-liner (named-let fact-acc ((n nn) (acc 1)) (if (= n 0) acc (fact-acc (1- n) (* n acc))))) fact-tc ;; MOST IMPORTANTLY, we see no CALLs, just a JMP at the end. So this function will only use its own stack, ;; won't replicate its environment for each "n" unlike the above fact-naive with CALL to self. Yay line 24 ! (disas 'fact-tc) doc: This computes n! tail-recursively ... args: (arg1) 0 dup 1 constant 1 2 constant nil 3:1 stack-ref 2 4 stack-ref 2 5 stack-ref 4 6 constant 0 7 eqlsign 8 goto-if-nil 2 11 stack-ref 3 12 return 13:2 stack-ref 4 14 sub1 15 stack-set 5 17 stack-ref 1 18 stack-ref 4 19 mult 20 stack-set 4 22 discardN 2 24 goto 1 ;; NOTE: at this point we LOOP back to line 3:1 (label 1), not CALL! nil ;;; To understand these iterations, I stepped through them symbolically, starting with x and y ;;; as the arguments to line 3:1. This trick is referred to as "symbolic execution" when ;;; properly automated. By tracking values like this I see what happens to the stack at the ;;; end of a pass through the loop: (disas 'fact-tc) doc: This computes n! tail-recursively ... args: (arg1) ;; [ n ] 0 dup ;; [ n n ] 1 constant 1 ;; [ 1 n n ] // acc is 1; noting it so will help us understand the data flow 2 constant nil ;; [ nil x y n ] // x is acc , y is n on the first iteration 3:1 stack-ref 2 ;; [ y nil x y n ] // this line will be jumped to by "goto 1" below 4 stack-ref 2 ;; [ x y nil x y n ] 5 stack-ref 4 ;; [ y x y nil x y n ] 6 constant 0 ;; [ 0 y x y nil x y n ] 7 eqlsign ;; [ (= 0 y) x y nil x y n ] 8 goto-if-nil 2 ;; consume TOS, fall through for y==0 , jump to label 2 (line 13:2) otherwise 11 stack-ref 3 ;; [ x x y nil x y n ] // this copy seems redundant, but eLisp byte-compiler just copies relevant values, just in case the original may be needed 12 return ;; returns x, i.e., acc 13:2 stack-ref 4 ;; [ y x n nil x y n ] 14 sub1 ;; [ (y-1) x y nil x y n ] ;; careful here, this is our first appearance of "stack-set": TOS gets written to i-th slot and popped 15 stack-set 5 ;; [ x y nil x (y-1) n ] 17 stack-ref 1 ;; [ y x y nil x (y-1) n ] 18 stack-ref 4 ;; [ x y x y nil x (y-1) n ] 19 mult ;; [ x*y x y nil x (y-1) n ] // TOS is now n*acc 20 stack-set 4 ;; [ x y nil x*y (y-1) n ] 22 discardN 2 ;; [ nil x*y (y-1) n ] 24 goto 1 ;; NOTE: at this point we LOOP back to line 3:1 (label 1), not CALL! nil ;;; From line 2 to line 24 [ nil x y n ] becomes [ nil x*y (y-1) n ] . Repeating inductively: ;;; [ nil acc n n ] -> [ nil n*acc (n-1) n ] -> [ nil n*(n-1)*acc (n-2) n ] -> [ nil n*(n-1)*(n-2) (n-3) n ] -> ... ;;; until we hit the base case of y reaching 0 ;;; We almost have the idea for the inductive proof of correctness for our loop! (byte-compile 'fact-tc) #[257 "\211\300\301^B^B^D\302U\203^M^@^C\207^DS\262^E^A^D_\262^D\266^B\202^C^@" [1 nil 0] 8 "This computes n! tail-recursively (fn NN)"] ^^^ argument specification ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ code ^^^^^^^^^ constants vector ;; Indeed, this function delivers :) (fact-tc 2000) 3316275092450633241175393380576324038281117208105780394571935437060380779056008224002732308597325922554...0000 ;; This is the best we can do with fact: (fact 794 1) 2997133322109004082939568340752004004922555931794516714954758232475005272767374800649488762358111526742...0000 ;; 795 already fails. ;;; ====================[ Behold the Closures ]============================ (defun make-counter () (let ((cnt 0)) (lambda () (setq cnt (1+ cnt)) cnt))) make-counter (setf (symbol-function 'a) (make-counter)) (closure ((cnt . 0)) nil (setq cnt (1+ cnt)) cnt) (setf (symbol-function 'b) (make-counter)) (closure ((cnt . 0)) nil (setq cnt (1+ cnt)) cnt) (setf (symbol-function 'c) (make-counter)) (closure ((cnt . 0)) nil (setq cnt (1+ cnt)) cnt) (c) 1 (c) 2 (a) 6 (a) 1 (a) 2 (a) 3 (a) 4 (b) 1 (b) 2 (a) 5 ;;; Look at the disassembly of the following to see how simple LETs are treated: (disassemble (lambda (y) (let ((x (fact 5))) (* x y)))) nil (disassemble (lambda (y) (let ((a 1) (b 2) (c 3) (d 4) (e 5)) (* a b c d e)))) nil (funcall (lambda (y) (let ((a 1) (b 2) (c 3) (d 4) (e 5)) (apply #'* (mapcar #'fact-tc (list a b c d e))))) 'a) 34560