# coding:utf-8 import time import urllib2 import random from bs4 import BeautifulSoup
def filter(tag): if cmp(tag.name, 'img') == 0: if tag.has_attr('class'): if cmp(tag['class'][0], 'lazy' == 0): return True
outfile = open("./20160717/avatar.txt", "a")
for i in range(135, 1500): print i url = 'http://www.woyaogexing.com/touxiang/index_'+str(i)+'.html' response = urllib2.urlopen(url) data = response.read() soup = BeautifulSoup(data, "lxml") imgs = soup.find_all(filter)
for img in imgs: outfile.write(img['src'] + ',' + str(random.randint(0, 1))+ '\n') time.sleep(0.5)
def filter(tag):#解析包含网名的标签 if cmp(tag.name, "ul") == 0: if tag.has_attr("class"): if cmp(tag['class'][0], 'list') == 0: return True
outfile = open("./name.txt", "a")#输出文件
for i in range(55, 145): print i url = 'http://www.oicq88.com/nvsheng/'+str(i)+'.htm' response = urllib2.urlopen(url)#/nvsheng/可以替换为其他的 data = response.read() soup = BeautifulSoup(data, "lxml") ul = soup.find_all(filter) ulsoup = BeautifulSoup(str(ul[0]), "lxml") lis = ulsoup.find_all("li")