MemoryBuffer 2017-08-30
一个简单的网络爬虫
# -*- coding: UTF-8 -*- import requests from lxml import etree import json def spider(url): html = requests.get(url) select = etree.HTML(html.text) content = select.xpath("/html/body/div[1]/div[2]/div[1]/div/div[1]/div/div/div[2]/div[1]/p[2]/strong") for e in content: print e.text if __name__=='__main__': spider("http://www.cnblogs.com/wangyayun/p/6557851.html")