如何解决如何从新页面添加行以进行循环
urls = []
for page in pages:
page = "https://www.golddist.com/index.PHP?skip="+str(page)+"&m=search&instock=1"
urls.append(page)
#print(urls)
for pgur in urls:
browser.get(pgur)
time.sleep(1)
manufs = browser.find_elements_by_xpath('/html/body/div/div[2]/div[2]/div/form/div[1]/table/tbody/tr[position()>1]/td[1]/a/b/font/span')
Identifier = []
for part1 in manufs:
manuf = part1.text
Identifier.append(manuf)
print(Identifier)
heads = browser.find_elements_by_xpath('/html/body/div/div[2]/div[2]/div/form/div[1]/table/tbody/tr[position()>1]/td[2]/span/a/b')
Title = []
for part2 in heads:
head = part2.text
Title.append(head)
#print(Title)
stocks = browser.find_elements_by_xpath('/html/body/div/div[2]/div[2]/div/form/div[1]/table/tbody/tr[position()>1]/td[3]/center/b/font/font')
Note = []
for part3 in stocks:
stock = part3.text
Note.append(stock)
#print(Note)
pri = browser.find_elements_by_xpath('/html/body/div/div[2]/div[2]/div/form/div[1]/table/tbody/tr[position()>1]/td[5]/center/b/font')
Price = []
for part4 in pri:
pric = part4.text
Price.append(pric)
#print(Price)
data = {'Identifier': Identifier,'Title': Title,'Price': Price,'Note': Note}
df = pd.DataFrame(data)
df.to_csv('GD1.csv',index = False)
如何从所有页面追加数据而不是不断替换新页面中的行? 我想从所有页面中抓取产品数据。但我的脚本只是用从新页面抓取的数据刷新 CSV 文件。
解决方法
使用列表在每次迭代时存储数据,最后将所有结果连接到单个数据框中:
ldf = [] # <- create a list to store dataframes for each url
for pgur in urls:
browser.get(pgur)
time.sleep(1)
...
data = {'Identifier': Identifier,'Title': Title,'Price': Price,'Note': Note}
df = pd.DataFrame(data)
ldf.append(df) # <- append your dataframe to the list
pd.concat(ldf).to_csv('GD1.csv',index=False) # <- overwrite csv with all data
df = pd.concat(ldf) # <- concatenate all dataframes
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。