Python3.4实现的12306最新验证码识别

2015/11/12 81997
    import ssl  
    import json  
    from PIL import Image  
    import urllib  
    import re  
    import urllib.request as urllib2  
    if hasattr(ssl, '_create_unverified_context'):  
        ssl.create_default_context = ssl._create_unverified_context  
    UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"  
    pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"  
    def get_img():  
        resp = urllib2.urlopen(pic_url)  
        raw = resp.read()  
        with open('./tmp.jpg', 'wb') as fp:  
            fp.write(raw)  
        return Image.open('./tmp.jpg')  
    def get_sub_img(im, x, y):  
        assert 0 <= x <= 3  
        assert 0 <= y <= 2  
        WITH = HEIGHT = 68  
        left = 5 + (67 + 5) * x  
        top = 41 + (67 + 5) * y  
        right = left + 67  
        bottom = top + 67  
        return im.crop((left, top, right, bottom))  
    def baidu_stu_lookup(im):  
        url = "http://stu.baidu.com/n/image?fr=html5&needRawImageUrl=true&id=WU_FILE_0&name=233.png&type=image%2Fpng&lastModifiedDate=Mon+Mar+16+2015+20%3A49%3A11+GMT%2B0800+(CST)&size="  
        im.save("./query_temp_img.png")  
        raw = open("./query_temp_img.png", 'rb').read()  
        url = url + str(len(raw))  
        req = urllib2.Request(url, raw, {'Content-Type': 'image/png', 'User-Agent': UA})  
        resp_url = urllib2.urlopen(req).read()  
      
        url = "http://stu.baidu.com/n/searchpc?queryImageUrl=" + urllib2.quote(resp_url)  
        req = urllib2.Request(url, headers={'User-Agent': UA})  
        resp = urllib2.urlopen(req)  
        html = resp.read().decode()  
        return baidu_stu_html_extract(html)  
    def baidu_stu_html_extract(html):  
      
        pattern = re.compile(r"keywords:'(.*?)'")  
        matches = pattern.findall(html)  
        if not matches:  
            return '[UNKOWN]'  
        json_str = matches[0]  
        json_str = json_str.replace('\\x22', '"').replace('\\\\', '\\')  
        result = [item['keyword'] for item in json.loads(json_str)]  
        return '|'.join(result) if result else '[UNKOWN]'  
    if __name__ == '__main__':  
        im = get_img()  
        for y in range(2):  
            for x in range(4):  
                im2 = get_sub_img(im, x, y)  
                result = baidu_stu_lookup(im2)  
                print((y, x), result)
改自https://github.com/andelf/fuck12306/blob/master/fuck12306.py
Python 3.4 可用
代码片段