16-bit holidays!
What could be better to spice up your winter holidays than some 16 bit x86 assembly? As an exercise for removing the rust from my assembly-fu, I wrote a small Base64 decoder purely in 8086 assembly code:
- it decodes a well-encoded message 4 bytes at at time and prints 3 characters to the screen;
- handles padding
=
s; - it does not handle faulty encodings;
- should use opcodes from the original 8086 only (unless I got it wrong);
- real mode code, meant to be embedded into a Master Boot Record;
- video output is done using the BIOS via
int 10h
.
How to assemble
You’ll need the Flat Assembler (fasm
) to assemble the program.
With that installed, just run:
fasm base64.s
to get base64.bin
as output. The file is already formatted to look
like a valid MBR: it’s 512 bytes long and ends with 55 AA
, so it can
be run directly using an emulator like bochs
or qemu
.
How to run
Once compiled, the simplest way to try it out is to have qemu
run it
as if it was a floppy image. It is obviously too small to be a full
floppy, but qemu
does not complain and we do no try to read additional
sectors:
qemu-system-x86_64 -fda base64.bin
What does it print? Well, why don’t you try it for yourself? But if you are in a hurry, there is a picture of it at the end of the page.
Of course, you can change the message and rebuild.
The code
; A base64 decoder, written in 8086 assembly language.
; Assemble using the Flat Assembler (https://flatassembler.net/)
; Emit a raw binary file, and arrange for labels to start at 0
format binary
org 0
; Entry point
; BIOS loads us at 0x7C00, but we don't know the exact segment/offset combo:
; it could be 07C0:0000, 0000:7C00 or any other equivalent form.
; So the program lives at [0x7C00, 0x7DFF].
start:
; Place the stack at [0x7E00, 0x7FFF) and enable interrupts
cli
cld
mov ax, 07E0h
mov ss, ax
mov sp, 0200h
sti
; Jump to the label below while normalizing the segment/offset pair
; so that effective addresses are correct w.r.t. "org"
mov ax, 07C0h
mov ds, ax ; init data segments
mov es, ax ; ...
push ax
mov ax, @f
push ax
retf
@@:
mov si, msg
; "chunk" processes 4 bytes of base64 data, yielding 3 characters
chunk:
; End of string?
cmp byte [si], 0
je endloop
xor bx, bx
mov cx, 4
mov di, c1
; Initially mark c3 and c4 as padded
mov word [c3], 0FFFFh
b64Char:
lodsb
; If we read a =, the chunk is padded and there
; are no more bits to read.
cmp al, '='
je printer
mov bl, al
mov al, [bx + base64]
stosb
loop b64Char
; As soon as we get here, we have read 4 b64 characters and
; placed them into c1, c2, c3, c4. Only the least significant
; 6 bits for each cX are used. But for c3 and c4, if the MSB
; is set it means there is no character because the block was
; padded.
; Visually: O = clear bit, I = set bit
; 1/2/3 = bit belongs to decoded char 1/2/3
; c1 = OO111111
; c2 = OO112222
; c3 = OO222233 | IIIIIIII (padding)
; c4 = OO333333 | IIIIIIII (padding)
printer:
; Reassemble decoded character 1 from c1 and c2
mov cx, 2
mov ax, word [c1] ; also read c2
xchg al, ah
shl al, cl
mov cl, 6
shr ax, cl
call print_char
; Reassemble decoded character 2 from c2 and c3,
; but only if we have a c3
test byte [c3], 80h
jnz endloop
mov cl, 4
mov al, [c2]
shl al, cl
mov ah, [c3]
shr cl, 1
shr ah, cl
or al, ah
call print_char
; Reassemble decoded character 3 from c3 and c4,
; but only if we have a c4
test byte [c4], 80h
jnz endloop
mov ax, word [c3] ; also read c4
xchg al, ah
shl al, cl
shr ax, cl
call print_char
jmp chunk
endloop:
hlt
jmp endloop
; Put print boilerplate in a procedure
; It expects the characters in AL and trashes AH, BH
print_char:
push cx
xor bh, bh
xor cx, cx
mov ah, 0Eh
int 10h
pop cx
retn
; Message to decode
msg db "SGFwcHkgaG9saWRheXMhISE=", 0
; Table converting ASCII characters to their base64 group of 6 bits.
; It starts at the lowest used character (+), that's why we use "virtual"
; to adjust the offest below.
chartab db 62 ; +
rb 3
db 63 ; /
db 52, 53, 54, 55, 56, 57, 58, 59, 60, 61 ; 0-9
rb 7
db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ; A-Z
db 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ; ...
rb 6
db 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38 ; a-z
db 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 ; ...
; The first used char is + (43)
virtual at chartab - '+'
base64 db ?
end virtual
; Base64 bytes under examination
; Must be word-aligned, because we read them as words in some cases
align 2
c1 db ?
c2 db ?
c3 db ?
c4 db ?
; Pad the program to 510 bytes, and append the MBR signature at the end.
; This makes it a proper MBR that can be executed by BIOS.
rb 510 - $
db 055h
db 0AAh