如何解决格式化 Python beautifulsoup 数据并删除重复的第一列值
我有以下代码段已经可用,但是我想通过删除一些重复的第一列数据来清理格式中的一些内容,使其更具可读性。
dropdown-toggle
电流输出:
from urllib.request import Request,urlopen
from bs4 import BeautifulSoup
import re,random,ctypes
import requests
from time import sleep
url = 'https://bscscan.com/tokentxns'
user_agent_list = [
"header = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0Gecko/20100101 Firefox/86.0'}","header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/83.0.4103.97 Safari/537.36'}","header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML,like Gecko) Version/13.1.1 Safari/605.1.15'}","header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/32.0.1667.0 Safari/537.36'}","header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/36.0.1985.67 Safari/537.36'}"
]
header = random.choice(user_agent_list)
pausesleep = float(random.randint(10000,30000)) / 10000 #orig
req = requests.get(url,header,timeout=10)
soup = BeautifulSoup(req.content,'html.parser')
rows = soup.findAll('table')[0].findAll('tr')
for row in rows[1:]:
tds = row.find_all('td')
txnhash = tds[1].text[0:]
age = tds[2].text[0:]
value = tds[7].text[0:]
token = tds[8].text[0:]
link = urljoin(url,tds[8].find('a')['href'])
print (str(txnhash) + " " + str(value) + " " + str(token))
需要改进:
0x70e16e1cbcd30d1c3a2abb03a3d3c43fc324aa794c45b10cd5ef1001e9af0915 899.885819768 TrusterCoin (TSC)
0x70e16e1cbcd30d1c3a2abb03a3d3c43fc324aa794c45b10cd5ef1001e9af0915 0.62679168 Wrapped BNB (WBNB)
0x52d862d3f920370d84039f2dccb40edc7343699310d3436b71738d4176997398 388,214,984,514.909719227 WoofCoin (WOOF)
0x52d862d3f920370d84039f2dccb40edc7343699310d3436b71738d4176997398 0.003 Wrapped BNB (WBNB)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 26.737674146727101117 Binance-Peg ... (BUSD)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 1.251364193609566793 Binance-Peg ... (ADA)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 0.03997685638568537 Binance-Peg ... (ADA)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 0.041171860015645402 Binance-Peg ... (ADA)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 0.089939749761843203 Wrapped BNB (WBNB)
解决方法
试试这个:
from urllib.request import Request,urlopen,urljoin
from bs4 import BeautifulSoup
import re,random,ctypes
import requests
from time import sleep
url = 'https://bscscan.com/tokentxns'
user_agent_list = [
"header = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0Gecko/20100101 Firefox/86.0'}","header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/83.0.4103.97 Safari/537.36'}","header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML,like Gecko) Version/13.1.1 Safari/605.1.15'}","header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/32.0.1667.0 Safari/537.36'}","header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/36.0.1985.67 Safari/537.36'}"
]
header = random.choice(user_agent_list)
pausesleep = float(random.randint(10000,30000)) / 10000
req = requests.get(url,header,timeout=10)
soup = BeautifulSoup(req.content,'html.parser')
rows = soup.findAll('table')[0].findAll('tr')
ne=[]
for row in rows[1:]:
tds = row.find_all('td')
txnhash = tds[1].text[0:]
age = tds[2].text[0:]
value = tds[7].text[0:]
token = tds[8].text[0:]
link = urljoin(url,tds[8].find('a')['href'])
if str(txnhash) not in ne:
ne.append(str(txnhash))
print (str(txnhash),end=" ")
else:# If you want those tab also then. Otherwise remove else
print("\t\t\t",end=" ")
print(str(value) + " " + str(token))
我们在 list of txnhash
中创建 ne
,然后每次检查新的 txnhash
是否在该列表中。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。