source: pwntcha/textlinkbrokers/pwntcha-overture.pl @ 500

Last change on this file since 500 was 500, checked in by Sam Hocevar, 14 years ago
  • textlinkbrokers job
  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 8.7 KB
Line 
1#!/usr/bin/perl
2
3# pwntcha-overture.pl: a decoder for overture.com Captchas.
4# Usage: pwntcha-overture.pl <image_name>
5#
6# Copyright (c) Sam Hocevar <sam@hocevar.net>
7# All rights reserved.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12# 1. Redistributions of source code must retain the above copyright
13#    notice, this list of conditions and the following disclaimer.
14# 2. Redistributions in binary form must reproduce the above copyright
15#    notice, this list of conditions and the following disclaimer in the
16#    documentation and/or other materials provided with the distribution.
17# 3. The name of the author may not be used to endorse or promote products
18#    derived from this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31use strict;
32use warnings;
33
34use Image::Magick;
35
36# Load our image
37die "Usage: $0 <image_name>" if !$ARGV[0];
38my $image = new Image::Magick;
39die "Could not load image `".$ARGV[0]."'" if $image->Read($ARGV[0]);
40
41# Retrieve the image size and check that it is the expected size (100x35)
42my ($w, $h) = $image->Get('width', 'height');
43die "Wrong image size (".$w."x".$h.")" if $w != 100 or $h != 35;
44
45# Since the image we load is JPEG, white is not exactly 0xffff and black
46# is not exactly 0x0000, so we simply threshold halfway at 0x8000 to get
47# a 2D array full of 0s (rather white) and 1s (rather black).
48# In this loop we also count the black pixels in each line for future use.
49my @pix;
50my @hlines = (0) x $h;
51my @vlines = (0) x $w;
52foreach my $y (0 .. $h - 1) {
53    foreach my $x (0 .. $w - 1) {
54        my (@p) = split(/,/, $image->Get('Pixel['.$x.','.$y.']'));
55        if ($p[0] < 0x8000) {
56            $pix[$x][$y] = 1;
57            $hlines[$y]++;
58            $vlines[$x]++;
59        } else {
60            $pix[$x][$y] = 0;
61        }
62    }
63}
64
65# Remove vertical black lines, if any. They are simply the lines that
66# have 35 black pixels.
67foreach my $x (0 .. $w - 1) {
68    next if $vlines[$x] != $h;
69    foreach my $y (0 .. $h - 1) {
70        $pix[$x][$y] = 0;
71        $hlines[$y]--;
72        $vlines[$x] = 0;
73    }
74}
75
76# Remove horizontal black lines. They were the lines that have 100 black
77# pixels, but since we already removed around 10 vertical black lines, we
78# count the lines that have 100 - 15 black pixels to be safe.
79foreach my $y (0 .. $h - 1) {
80    next if $hlines[$y] < $w - 15;
81    foreach my $x (0 .. $w - 1) {
82        next if !$pix[$x][$y];
83        $pix[$x][$y] = 0;
84        $hlines[$y] = 0;
85        $vlines[$x]--;
86    }
87}
88
89## XXX DEBUG XXX
90#print "/* XPM */ static char *xpm[] = {\n";
91#print "\"".$w." ".$h." 35 2 1\",\n";
92#print "\". c white\",\n";
93#print "\"@ c black\",\n";
94#for ($y = 0; $y < $h; $y++) {
95#    print "\"";
96#    for ($x = 0; $x < $w; $x++) { print $pix[$x][$y] ? "@" : "."; }
97#    print "\",\n";
98#}
99#print "};\n";
100## XXX DEBUG XXX
101
102# We now try to segment our image to detect the glyphs, simply by looking
103# at empty horizontal lines. One empty column may mean a space between
104# glyphs, but also an empty perturbation line. So we search for at least
105# two consecutive empty columns.
106my (@xstart, @xstop) = ((0) x 4, (0) x 4);
107my $glyph = 0;
108my $in_glyph = 0;
109foreach my $x (0 .. $w - 1) {
110    if ($in_glyph) {
111        if ($x == $w - 1) {
112            $in_glyph = 0;
113            $xstop[$glyph] = $x + ($vlines[$x] ? 1 : 0);
114            $glyph++;
115        } elsif (!$vlines[$x] and !$vlines[$x + 1]) {
116            $in_glyph = 0;
117            $xstop[$glyph] = $x;
118            $glyph++;
119        }
120    } else {
121        if ($vlines[$x]) {
122            $in_glyph = 1;
123            $xstart[$glyph] = $x;
124        }
125    }
126}
127
128# If we found more than 4 glyphs, it means the image is not in the expected
129# format. There is nothing we can do. Same thing if we found no glyph at all.
130die "Found $glyph glyphs, expected 4" if $glyph > 4 or $glyph == 0;
131
132# If we found less than 4 glyphs, it probably means that a few letters were
133# too close from each other and only had a one-pixel space between them. We
134# try to recover by splitting big glyphs. Since the glyphs in the original
135# font are between 13 and 19 pixels wide, any glyph in our image wider than
136# 20 pixels cannot be a single character and must be split.
137if ($glyph < 4 and $glyph > 0) {
138    my $g;
139    for ($g = $glyph - 1; $g >= 0; $g--) {
140        if ($xstop[$g] - $xstart[$g] > 20) {
141            my $mean = ($xstart[$g] + $xstop[$g]) / 2;
142            foreach my $x (0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5) {
143                next if $vlines[$mean + $x];
144                my $tmp;
145                for ($tmp = 3; $tmp > $g + 1; $tmp--) {
146                    $xstart[$tmp] = $xstart[$tmp - 1];
147                    $xstop[$tmp] = $xstop[$tmp - 1];
148                }
149                $xstop[$g + 1] = $xstop[$g];
150                $xstart[$g + 1] = $mean + $x + 1;
151                $xstop[$g] = $mean + $x;
152                $glyph++;
153                last;
154            }
155        }
156    }
157}
158
159# If we still have less than 4 glyphs, just give up.
160if ($glyph < 4) {
161    print "????\n";
162    exit 0;
163}
164
165# We need to know the top line of each of our glyphs to speed up pattern
166# matching later on. We do this by detecting the first non-empty line.
167my @ystart = (0) x 4;
168foreach $glyph (0 .. 3) {
169A:  foreach my $y (0 .. $h - 1) {
170        foreach my $x ($xstart[$glyph] .. $xstop[$glyph] - 1) {
171            next if !$pix[$x][$y];
172            $ystart[$glyph] = $y;
173            last A;
174        }
175    }
176}
177
178# Because of the perturbation lines that may have cropped part of our
179# glyphs, we grow the glyph boundaries by one pixel.
180foreach $glyph (0 .. 3) {
181    $xstart[$glyph]-- if $xstart[$glyph];
182    $xstop[$glyph]++ if $xstop[$glyph] < $w - 1;
183    $ystart[$glyph]-- if $ystart[$glyph];
184}
185
186# Load our font
187$image = new Image::Magick;
188die "Could not load font `font.png'" if $image->Read("font.png");
189
190# Convert our font image to a 2D array to speed up things. We also store
191# the width of each glyph for faster access.
192my (@fxstart, @fxstop) = ((0) x 29, (0) x 29);
193my @font;
194my $ch = 0;
195foreach my $x (0 .. 506 - 1) {
196    my $line_empty = 1;
197    foreach my $y (0 .. 20 - 1) {
198        my (@p) = split(/,/, $image->Get('Pixel['.$x.','.$y.']'));
199        if ($p[0] < 0x8000) {
200            $line_empty = 0;
201            $font[$x][$y] = 1;
202        } else {
203            $font[$x][$y] = 0;
204        }
205    }
206    if ($line_empty) {
207        $fxstop[$ch] = $x;
208        $fxstart[$ch + 1] = $x + 1 if $ch < 28;
209        $ch++;
210    }
211}
212
213## XXX DEBUG XXX
214#for ($y = 0; $y < 20; $y++) {
215#    for ($x = 0; $x < 506; $x++) {
216#        print $font[$x][$y] ? "@" : ".";
217#    }
218#    print "\n";
219#}
220## XXX DEBUG XXX
221
222# Now try to match each of our glyphs with the font characters. Given the
223# simplicity of the font, we can do simple brute force pattern matching.
224foreach $glyph (0 .. 3) {
225    my $found = 0;
226A:  foreach $ch (0 .. 28) {
227        # Try to match the glyph at different offsets
228        foreach my $y ($ystart[$glyph] .. $ystart[$glyph] + 2) {
229            foreach my $x ($xstart[$glyph] .. $xstart[$glyph] + 2) {
230                my $ok = 1;
231                # Check each pixel of the glyph
232B:              foreach my $dy (0 .. ($ch < 9 ? 20 : 19) - 1) {
233                    foreach my $dx (0 .. $fxstop[$ch] - $fxstart[$ch] - 1) {
234                        # We could check something here, too.
235                        next if ($font[$fxstart[$ch] + $dx][$dy]);
236                        # If the pixel is white in the font, then it
237                        # must not be black in our image. This test should
238                        # be more than enough to crack the captcha.
239                        if ($pix[$x + $dx][$y + $dy]) {
240                            $ok = 0;
241                            last B;
242                        }
243                    }
244                }
245                if ($ok) {
246                    $found = substr("123456789BCDFGHJKLMNPRSTVWXYZ", $ch, 1);
247                    last A;
248                }
249            }
250        }
251    }
252    print $found ? $found : "?";
253}
254print "\n";
255
256# Finished! Everything should have been fine.
257exit 0;
258
Note: See TracBrowser for help on using the repository browser.