python3使用urllib抓取用户名密码登陆的网页

zhongranxu 2019-06-26

#python34
# -*- coding: utf-8 -*-

import http.cookiejar
import urllib.error
import urllib.parse
import urllib.request

LOGIN_URL = r'http://......'
get_url = 'http://.......'  # 利用cookie请求访问另一个网址

username=input('用户名:')
password=input('密码:')
values = {'username': username, 'password': password}

postdata = urllib.parse.urlencode(values).encode()

headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}

cookie_filename = 'cookie_jar.txt'
cookie_jar = http.cookiejar.MozillaCookieJar(cookie_filename)
handler = urllib.request.HTTPCookieProcessor(cookie_jar)
opener = urllib.request.build_opener(handler)

request = urllib.request.Request(LOGIN_URL, postdata, headers)

try:
    response = opener.open(request)
    
    #将cookies保存到本地test.txt,便于读取
    cookies = list()
    for item in cookie_jar:
        cookies.append(item.name + '=' + item.value)
    print(cookies)
    with open('test.txt', 'w+', encoding='utf-8') as f:
        f.write(';'.join(cookies))
    
    #将cookies保存到本地cookie_jar.txt
    #cookie_jar.save(ignore_discard=True, ignore_expires=True)
except urllib.error.URLError as e:
    print(e.code, ':', e.reason)

#测试获取数据
get_request = urllib.request.Request(get_url)  
get_response = opener.open(get_request)  
print(get_response.read().decode())

相关推荐

jszy / 0评论 2017-01-27