用正则表达式来爬取某论坛的邮箱地址

代码演示:

import re

import requests
def fand_email(url,counts):
    data=requests.get(url)
    content=data.text
    pattern = r'[0-9a-zA-Z._]+@[0-9a-zA-Z._]+\.[0-9a-zA-Z._]+'
    p = re.compile(pattern)
    m = p.findall(content)
    with open('emal.txt','a+') as f:
        for i in m:
            f.write(i+'\n')
            print(i)
            counts= counts+1
    return counts
def main():
    counts=0
    numbers=0
    for i in range(1,32):
        url='http://tieba.baidu.com/p/2314539885?pn=%s'% i
        number=fand_email(url,counts)
        numbers=numbers+number
    print(numbers)
if __name__ == '__main__':
    main()
-------------本文结束感谢您的阅读-------------
坚持原创技术分享,您的支持将鼓励我继续创作!