首先读取网站的链接存到txt文件中
import re
import urllib.request
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from multiprocessing import Pool
browser = webdriver.Chrome()
wait=WebDriverWait(browser, 10)
def search(url,next_title):
try:
browser.get(url)
html = browser.page_source
reg = r'<span style="" id="streamurj">(.*?)</span>'
wang = re.findall(reg, html)
for i in wang:
zhongzi='https://openload.co/stream/' +i
print(zhongzi)
with open('lianjie.txt', 'a+') as f:
f.write(str(zhongzi) + '\n')
f.close()
#load(product['url'],product['title'])
except TimeoutException:
return search(url,next_title)
def load(url,title):
print('正在下载',url)
urllib.request.urlretrieve(url,'mp4/%s.mp4'% title)
print('下载成功',title)
def index_page(url):
try:
browser.get(url)
html = browser.page_source
reg = r'<td class="mytd-padding"><a href="(.*?)">'
video = re.findall(reg, html)
for age in video:
next_url = 'http://ourjav.com/' + age.split('[')[0]
next_title=age.split('[')[-1]
next_page(next_url,next_title)
except TimeoutException:
index_page(url)
def next_page(url,next_title):
try:
browser.get(url)
html = browser.page_source
age=r'<iframe id="flash_game_object" name="flash_game_object" frameborder="no" scrolling="no" width="650" height="500" src="(.*?)" allowfullscreen="true" webkitallowfullscreen="true" mozallowfullscreen="true"></iframe>'
video=re.findall(age,html,re.S)
for image in video:
search(image,next_title)
except TimeoutException:
next_page(url, next_title)
def read_load():
f2 = open("lianjie.txt","r")
lines = f2.readlines()
count=0
for line3 in lines:
print(line3)
respon=browser.get(line3)
count=count+1
if count==5:
break
print(count)
def main(offset):
url = 'http://ourjav.com/'+'index.php?&page=%s' % offset
index_page(url)
if __name__ == '__main__':
group = [x for x in range(1, 11)]
pool = Pool()
pool.map(main, group)
其次从txt文件中读取url进行下载
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
browser = webdriver.Chrome()
wait=WebDriverWait(browser, 10)
def read_load():
f2 = open("lianjie.txt","r")
lines = f2.readlines()
count=0
for line3 in lines:
print(line3)
browser.get(line3)
count=count+1
if count==5:
break
print(count)
def main():
read_load()
if __name__ == '__main__':
main()