李小粹

【求助】python的for语句没有被执行

[color=#808080][i]这是源代码[/i][/color][color=#808080][i]
[/i][/color][color=#808080][i]# coding=utf-8
[/i][/color][color=#000080][b]import [/b][/color]re
[color=#000080][b]import [/b][/color]sys
[color=#000080][b]import [/b][/color]time

[color=#000080][b]import [/b][/color]requests
[color=#000080][b]from [/b][/color]lxml [color=#000080][b]import [/b][/color]etree

[color=#000080][b]import [/b][/color]importlib

importlib.reload(sys)

[color=#808080][i]# 定义一个爬虫
[/i][/color][color=#000080][b]class [/b][/color]spider([color=#000080]object[/color]):
[color=#000080][b]def [/b][/color][color=#b200b2]init/color:
[color=#000080]print/color

[color=#808080][i]# getsource用来获取网页源代码

[/i][/color][color=#808080][i] [/i][/color][color=#000080][b]def [/b][/color]getsource([color=#94558d]self[/color], url):
html = requests.get(url)
[color=#000080][b]return [/b][/color]html.text

[color=#808080][i]# changepage用来生产不同页数的链接

[/i][/color][color=#808080][i] [/i][/color][color=#000080][b]def [/b][/color]changepage([color=#94558d]self[/color], url, total_page):
[color=#000080][b]if [/b][/color]re.search([color=#008080][b]'index_(\d+)'[/b][/color], url, re.S):
now_page = [color=#000080]int/color'[/b][/color], url, re.S).group([color=#0000ff]1[/color])) [color=#808080][i]# 可修改
[/i][/color][color=#808080][i] [/i][/color][color=#000080][b]else[/b][/color]:
now_page = [color=#0000ff]0
[/color][color=#0000ff] [/color]page_group = []
[color=#000080][b]for [/b][/color]i [color=#000080][b]in [/b][/color][color=#000080]range[/color](now_page, total_page + [color=#0000ff]1[/color]):
link = re.sub([color=#008080][b]'index_\d+'[/b][/color], [color=#008080][b]'index_%s' [/b][/color]% i, url, re.S) [color=#808080][i]# 可修改
[/i][/color][color=#808080][i] [/i][/color]page_group.append(link)
[color=#000080][b]return [/b][/color]page_group

[color=#808080][i]# getpic用来爬取一个网页图片

[/i][/color][color=#808080][i] [/i][/color][color=#000080][b]def [/b][/color]getpic([color=#94558d]self[/color], source):
selector = etree.HTML(source)
pic_url = selector.xpath([color=#008080][b]'//ul[@class="ali"]/li/div/a/img/@src'[/b][/color]) [color=#808080][i]# 可修改
[/i][/color][color=#808080][i] [/i][/color][color=#000080][b]return [/b][/color]pic_url

[color=#808080][i]# savepic用来保存结果到pic文件夹中

[/i][/color][color=#808080][i] [/i][/color][color=#000080][b]def [/b][/color]savepic([color=#94558d]self[/color], pic_url):
picname = re.findall([color=#008080][b]'(\d+)'[/b][/color], link, re.S) [color=#808080][i]# 可修改
[/i][/color][color=#808080][i] [/i][/color]picnamestr = [color=#008080][b]''[/b][/color].join(picname)
i = [color=#0000ff]0
[/color][color=#0000ff] [/color][color=#808080][i]#------------------------------------------下面这段没有执行!!!-----------------------------------------
[/i][/color][color=#808080][i] [/i][/color][color=#000080][b]for [/b][/color]each [color=#000080][b]in [/b][/color]pic_url:
[color=#000080]print[/color]([color=#008080][b]'now downloading:{}'[/b][/color].format(each))
pic = requests.get(each)
fp = [color=#000080]open[/color]([color=#008080][b]'pic[/b][/color][color=#000080][b]\[/b][/color][color=#008080][b]' [/b][/color]+ picnamestr + [color=#008080][b]'-' [/b][/color]+ [color=#000080]str/color + [color=#008080][b]'.jpg'[/b][/color], [color=#008080][b]'wb'[/b][/color])
fp.write(pic.content)
fp.close()
i += [color=#0000ff]1
[/color][color=#0000ff]
[/color][color=#0000ff] [/color][color=#808080][i]# ppic集合类的方法
[/i][/color][color=#808080][i]
[/i][/color][color=#808080][i] [/i][/color][color=#000080][b]def [/b][/color]ppic([color=#94558d]self[/color], link):
[color=#000080]print/color
html = picspider.getsource(link)
pic_url = picspider.getpic(html)
picspider.savepic(pic_url)

[color=#000080][b]if [/b][/color]name == [color=#008080][b]'main'[/b][/color]:
start = time.time()
url = [color=#008080][b]'http://www.baidu.com/' [/b][/color][color=#808080][i]# 可修改
[/i][/color][color=#808080][i] [/i][/color]picspider = spider()
all_links = picspider.changepage(url, [color=#0000ff]3[/color]) [color=#808080][i]# 可修改
[/i][/color][color=#808080][i] [/i][/color][color=#000080][b]for [/b][/color]link [color=#000080][b]in [/b][/color]all_links:
picspider.ppic(link)
end = time.time()
[color=#000080]print[/color]([color=#008080][b]'耗时:{}'[/b][/color].format(start - end))

查看回复
0%
粤ICP备18082987号-1 浙公网安备 33010902001746号
友情链接:
喵宅苑
喵空间社区程序
络合兔
技术宅
腕能新趣
小五四博客
莉可POI
Mithril.js
枫の主题社
Project1
午后少年
机智库
七濑胡桃
xiuno
幻想の博客