学习小记

时间:2020-01-09 20:53:06   收藏:0   阅读:93
# -*- coding : utf-8 -*-
#@Time :2020/1/9 10:32
#@Author :LI
import requests
import json
from lxml import etree
def getOnePage(n):
url = f‘https://maoyan.com/board/4?offset={n*10}‘
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"}
r = requests.get(url=url,headers=headers)
return r.text
# print(r.text)

def parse(text):
html = etree.HTML(text)
names = html.xpath(‘//div[@class="movie-item-info"]/p[@class="name"]/a/@title‘)
releasetimes = html.xpath(‘//p[@class="releasetime"]/text()‘)
item = {}
#使用zip函数(拉链函数)
for name,releasetime in zip(names, releasetimes):
item[‘name‘] = name
item[‘releasetime‘] = releasetime
#生成器
yield item
#print(name,releasetime)
#保存数据
def saveData(data):
with open(‘movie.json‘, ‘a‘, encoding=‘utf-8‘)as f:
#将字典、列表转化为字符串
data = json.dumps(data, ensure_ascii=False) + ‘\n‘
f.write(data)

def run():
for a in range(0,10):
text = getOnePage(a)
iteams = parse(text)
for iteam in iteams:
print(iteam)
saveData(iteam)

if __name__ == ‘__main__‘:
run()
评论(0
© 2014 mamicode.com 版权所有 京ICP备13008772号-2  联系我们:gaon5@hotmail.com
迷上了代码!