for i inrange(1,((int(number)+1)//21)+1):#此for语句作用:模拟滚动,以加载群成员数据 driver.execute_script("window.scrollTo(0,document.body.scrollHeight)") time.sleep(1)
content = driver.page_source #获取网页文件保存在content中 website_name = url+'.html' withopen(website_name, 'w',encoding='utf-8') as f: f.write(content) book = xlwt.Workbook(encoding='utf-8') #新建xls文件 sheet = book.add_sheet('sheet',cell_overwrite_ok=True) #新建数据表 file = open(website_name, 'rb') #打开从网页上保存下来的HTML文件 html = file.read() soup = BeautifulSoup(html,"lxml") bodys = soup.find_all('div',class_='body') book_name = url + '.xls'#book_name变量是excel的文件名 for word inrange(7): #此for语句是为了在excel表格的第一行中写入信息 sheet.write(0,word,keyword[str(word)]) for body in bodys: dls = body.find_all('dl') for dl in dls: dds = dl.find_all('dd') for dd in dds: div_classes = dd.find_all('div',class_='group-memeber') for div_class in div_classes: tables = div_class.find_all('table') #左侧的多重For循环为了在HTML中提取群成员数据 for table in tables: tbodys = table.find_all('tbody',class_='list') for tbody in tbodys: trs = tbody.find_all('tr') for tr in trs: for row inrange(7): sheet.write(line,row,list(tr.stripped_strings)[row]) #往excel表格中写入群成员数据 line = line +1 book.save(book_name)#保存工作簿 print('已将群成员数据保存在本地目录下!!!!!!!') main()