首先要登陆豆瓣要必须获取验证码
1、获取验证码的图片链接下载到本地
req=r'<img id="captcha_image" src="(.*?)" alt="captcha" class="captcha_image"/>'
yanzhenma=re.findall(req,html.text,re.S)
2、手动输入验证码添加到data中
x=input('请输入验证码:')
data['captcha-solution']=x
data['captcha-id']=captcha_id[0]
3、代码演示
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2018/3/15 20:20
# @Author : tomorrowli
import requests
import re
import os
import time
import random
from hashlib import md5
def geturl():
time.sleep(random.randint(2,5))
url='https://www.douban.com/accounts/login'
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
}
data={
'source':'index_nav',
'form_email':'用户名',
'form_password':'密码',
}
html=requests.get(url,headers=headers)
#print(html.text)
req=r'<img id="captcha_image" src="(.*?)" alt="captcha" class="captcha_image"/>'
captcha = r'<input type="hidden" name="captcha-id" value="(.*?)"/>'
yanzhenma=re.findall(req,html.text,re.S)
if yanzhenma:
captcha_id = re.findall(captcha,html.text,re.S)
html=requests.get(yanzhenma[0])
#可以采用MD5算法给图片随机取一个名字
#md5(html.content)
file_path = '{0}/{1}.{2}'.format(os.getcwd(), md5(html.content).hexdigest(), 'jpg')
if not os.path.exists(file_path):
with open(file_path, 'wb') as f:
f.write(html.content)
f.close()
x=input('请输入验证码:')
data['captcha-solution']=x
data['captcha-id']=captcha_id[0]
html = requests.post(url , data=data , headers=headers)
print(html.status_code)
print('登陆成功')
else:
html=requests.post(url,data=data,headers=headers)
print('登陆成功')
def main():
geturl()
if __name__ == '__main__':
main()
结果展示