Python html.parser 模块,Feed() 实例源码
我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用html.parser.Feed()。
def _get_eol_list(self) -> typing.List[str]:
"""Scrapes the FreeBSD website and returns a list of EOL RELEASES"""
request = urllib.request.Request(
self.eol_url,
headers={
"Accept-Charset": "utf-8"
}
)
with urllib.request.urlopen(request) as response: # nosec: B310
if response.getcode() != 200: # noqa: T484
iocage.lib.errors.distributionEOLWarningDownloadFailed(
logger=self.logger,
level="warning"
)
return []
parser = EOLParser()
data = response.read().decode("utf-8", "ignore")
parser.Feed(data)
parser.close()
return parser.eol_releases
def _run_check(self, source, expected_events, collector=None):
if collector is None:
collector = self.get_collector()
parser = collector
for s in source:
parser.Feed(s)
parser.close()
events = parser.get_events()
if events != expected_events:
self.fail("received events did not match expected events\n"
"Expected:\n" + pprint.pformat(expected_events) +
"\nReceived:\n" + pprint.pformat(events))
def _parse_error(self, source):
def parse(source=source):
parser = self.get_collector()
parser.Feed(source)
parser.close()
self.assertRaises(html.parser.HTMLParseError, parse)
def _run_check(self, collector=None):
if collector is None:
collector = self.get_collector()
parser = collector
for s in source:
parser.Feed(s)
parser.close()
events = parser.get_events()
if events != expected_events:
self.fail("received events did not match expected events\n"
"Expected:\n" + pprint.pformat(expected_events) +
"\nReceived:\n" + pprint.pformat(events))
def parse(html):
'''Esegue il parsing HTML del testo html e
ritorna la radice dell'albero.'''
parser = _MyHTMLParser()
parser.Feed(html)
return parser.root
def main():
htm = open("sheet001.htm").read()
parser = ToolHireParser()
parser.Feed(htm)
print(parser.dates)
def _run_check(self, collector=None):
if collector is None:
collector = self.get_collector()
parser = collector
for s in source:
parser.Feed(s)
parser.close()
events = parser.get_events()
if events != expected_events:
self.fail("received events did not match expected events" +
"\nSource:\n" + repr(source) +
"\nExpected:\n" + pprint.pformat(expected_events) +
"\nReceived:\n" + pprint.pformat(events))
def _parse_error(self, source):
def parse(source=source):
parser = self.get_collector()
parser.Feed(source)
parser.close()
with self.assertRaises(html.parser.HTMLParseError):
with self.assertWarns(DeprecationWarning):
parse()
def test_convert_charrefs_dropped_text(self):
# #23144: make sure that all the events are triggered when
# convert_charrefs is True,even if we don't call .close()
parser = EventCollector(convert_charrefs=True)
# before the fix,bar & baz was missing
parser.Feed("foo <a>link</a> bar & baz")
self.assertEqual(
parser.get_events(),
[('data', 'foo '), ('starttag', 'a', []), ('data', 'link'),
('endtag', 'a'), ' bar & baz')]
)
def _run_check(self, collector=None):
if collector is None:
collector = self.get_collector()
parser = collector
for s in source:
parser.Feed(s)
parser.close()
events = parser.get_events()
if events != expected_events:
self.fail("received events did not match expected events" +
"\nSource:\n" + repr(source) +
"\nExpected:\n" + pprint.pformat(expected_events) +
"\nReceived:\n" + pprint.pformat(events))
def _parse_error(self, source):
def parse(source=source):
parser = self.get_collector()
parser.Feed(source)
parser.close()
with self.assertRaises(html.parser.HTMLParseError):
with self.assertWarns(DeprecationWarning):
parse()
def find_scripts(site):
parser = ScriptParser()
parser.Feed(site)
return parser.scripts
def execute(self, context):
import html.parser
import urllib.request
remote_platforms = []
ps = context.scene.ge_publish_settings
# create lib folder if not already available
lib_path = bpy.path.abspath(ps.lib_path)
if not os.path.exists(lib_path):
os.makedirs(lib_path)
print("Retrieving list of platforms from blender.org...", end=" ", flush=True)
class AnchorParser(html.parser.HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == 'a':
for key, value in attrs:
if key == 'href' and value.startswith('blender'):
remote_platforms.append(value)
url = 'http://download.blender.org/release/Blender' + bpy.app.version_string.split()[0]
parser = AnchorParser()
data = urllib.request.urlopen(url).read()
parser.Feed(str(data))
print("done", flush=True)
print("Downloading files (this will take a while depending on your internet connection speed).", flush=True)
for i in remote_platforms:
src = '/'.join((url, i))
dst = os.path.join(lib_path, i)
dst_dir = '.'.join([i for i in dst.split('.') if i not in {'zip', 'tar', 'bz2'}])
if not os.path.exists(dst) and not os.path.exists(dst.split('.')[0]):
print("Downloading " + src + "...", flush=True)
urllib.request.urlretrieve(src, dst)
print("done", flush=True)
else:
print("Reusing existing file: " + dst, flush=True)
print("Unpacking " + dst + "...", flush=True)
if os.path.exists(dst_dir):
shutil.rmtree(dst_dir)
shutil.unpack_archive(dst, dst_dir)
print("done", flush=True)
print("Creating platform from libs...", flush=True)
bpy.ops.scene.publish_auto_platforms()
return {'FINISHED'}
def serialize(result):
"""For a given Met result,map that to our database"""
imageinfos = result['ImageInfo']
thumbnail = None
url = None
for info in imageinfos:
if info['Primarydisplay']:
# Use this one
thumbnail = ENDPOINT_BASE_IMAGE_URL + info['Thumbnail']
url = ENDPOINT_BASE_IMAGE_URL + info['LargeWebsite']
break
if not url:
log.warning("Did not get an image URL for %s", result)
return
image = models.Image(url=url)
image.provider = PROVIDER_NAME
image.source = SOURCE_NAME
# Creator might be a few fields
tombstone = result['Tombstone']
creator_names = []
for t in tombstone:
if t['Name'] in CREATOR_LABELS:
val = t['Value']
parser = CreatorParser()
parser.Feed(val)
creator_names.append(" ".join(parser.out))
if len(creator_names) > 0:
image.creator = ",".join(creator_names)
image.thumbnail = thumbnail
image.license = "cc0"
image.license_version = '1.0'
image.foreign_identifier = result['CollectionObject']['CRDID']
image.foreign_landing_url = FOREIGN_LANDING_BASE_URL + str(image.foreign_identifier)
image.title = result['CollectionObject']['Title']
image.identifier = signals.create_identifier(image.url)
image.last_synced_with_source = timezone.Now()
try:
image.save()
log.info("Adding image %s-%s (%s) identifier %s", image.title, image.creator, image.foreign_identifier, image.identifier)
except IntegrityError as e:
log.warn(e)
pass
return image
def execute(self, flush=True)
bpy.ops.scene.publish_auto_platforms()
return {'FINISHED'}
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。