Python mistune 模块,markdown() 实例源码
我们从Python开源项目中,提取了以下18个代码示例,用于说明如何使用mistune.markdown()。
def markdown_preprocess(markdown_content):
readme_rendered = mistune.markdown(markdown_content, escape=False)
soup = BeautifulSoup(readme_rendered, "html.parser")
# Replace anchors with content where relevant and extract otherwise
for link in soup.findAll('a'):
if link.text.startswith('http'):
link.extract()
else:
link.replaceWithChildren()
# Remove all the images
for image in soup.findAll('img'):
image.extract()
# Remove all the code blocks
for code_block in soup.findAll('code'):
code_block.extract()
return soup.text
def create_issue(self, request, group, form_data, **kwargs):
"""
Creates the issue on the remote service and returns an issue ID.
"""
instance = self.get_option('instance', group.project)
project = (
form_data.get('project') or
self.get_option('default_project', group.project)
)
client = self.get_client(request.user)
title = form_data['title']
description = form_data['description']
link = absolute_uri(group.get_absolute_url())
try:
created_item = client.create_work_item(
instance=instance,
project=project,
title=title,
comment=markdown(description),
link=link,
)
except Exception as e:
self.raise_error(e, identity=client.auth)
return {
'id': created_item['id'],
'url': created_item['_links']['html']['href'],
'title': title,
}
def link_issue(self, **kwargs):
client = self.get_client(request.user)
instance = self.get_option('instance', group.project)
if form_data.get('comment'):
try:
work_item = client.update_work_item(
instance=instance,
id=form_data['item_id'],
link=absolute_uri(group.get_absolute_url()),
comment=markdown(form_data['comment']) if form_data.get(
'comment') else None,
)
except Exception as e:
self.raise_error(e, identity=client.auth)
else:
try:
work_item = client.get_work_item(
instance=instance, identity=client.auth)
return {
'id': work_item['id'],
'url': work_item['_links']['html']['href'],
'title': work_item['fields']['System.Title'],
}
def description(self):
"""Extract description from a release."""
if self.release.get('body'):
return markdown(self.release['body'])
elif self.repository.get('description'):
return self.repository['description']
return 'No description provided.'
def get_statement():
try:
with open('PRIVACY_STATEMENT.md', 'r') as f:
print('Using custom privacy statement')
return mistune.markdown(f.read(), escape=False, hard_wrap=True)
except FileNotFoundError:
print('Using default privacy statement.')
return DEFAULT_STATEMENT
def extract_markdown(text):
text = readme_clean(text)
pattern = re.compile(r'\#+(.+?)\#+', flags=re.DOTALL)
result = re.findall(pattern, text)
if (len(result) > 10):
return result[0].replace('\n', ' ')
else:
text = mistune.markdown(text)
pattern = re.compile(r'<p(.+?)/p>+', flags=re.DOTALL)
result = re.findall(pattern, text)
if (len(result) != 0):
return result[0].replace('\n', ' ')
else:
info = (text[:MAX_STRING_LENGTH] + ' ...') if len(text) > MAX_STRING_LENGTH else text
return clean(info)
def write_entry(filename):
'''
entry text generator
* dump given file into entry format by parsing file as markdown
* return as list of strings
'''
date = parse_date(filename)
entry = [
"\t\t<p><a name=\""+date[0]+date[1]+date[2]+"\"></a><br /><br /></p>\n",
"\t\t<div class=\"entry\">\n",
"\t\t\t<h5><a href=\"#"+date[0]+date[1]+date[2]+"\">"+date[2]+"</a> "+chatter.month(date[1])+" "+date[0]+"</h5>\n"
#"\t\t\t<P>"
]
raw = []
rawfile = open(os.path.join(config.USER_DATA, filename), "r")
for line in rawfile:
raw.append(line)
rawfile.close()
entry.append("\t\t\t"+mistune.markdown("".join(raw), hard_wrap=False))
#for line in raw:
#entry.append(line+"\t\t\t")
#if line == "\n":
# entry.append("</p>\n\t\t\t<p>")
#entry.append("</p>\n")
entry.append("\t\t\t<p style=\"font-size:.6em; font-color:#808080; text-align: right;\"><a href=\""+"".join(date)+".html\">permalink</a></p>\n")
entry.append("\n\t\t</div>\n")
return entry
def _create_jinja_environment(site_root, link_ext):
env = jinja2.Environment(
loader=jinja2.FileSystemloader(_runfile_path(TEMPLATE_PATH)),
keep_trailing_newline=True,
line_statement_prefix='%')
env.filters['markdown'] = lambda text: jinja2.Markup(mistune.markdown(text))
env.filters['doc_link'] = (
lambda fname: site_root + '/' + fname + '.' + link_ext)
env.filters['link'] = lambda fname: site_root + '/' + fname
return env
# Todo(dzc): Remove this workaround once we switch to a self-contained Python
# binary format such as PEX.
def _write_ruleset(self, output_dir, ruleset):
# Load template and render Markdown.
template = self.__env.get_template('markdown.jinja')
out = template.render(ruleset=ruleset)
# Write output to file. Output files are created in a directory structure
# that matches that of the input file.
output_path = ruleset.output_file + '.md'
output_file = "%s/%s" % (output_dir, output_path)
file_dirname = os.path.dirname(output_file)
if not os.path.exists(file_dirname):
os.makedirs(file_dirname)
with open(output_file, "w") as f:
f.write(out)
return (output_file, output_path)
def _write_ruleset(self, ruleset, nav):
# Load template and render markdown.
template = self.__env.get_template('html.jinja')
out = template.render(title=ruleset.title, ruleset=ruleset, nav=nav)
# Write output to file. Output files are created in a directory structure
# that matches that of the input file.
output_path = ruleset.output_file + '.html'
output_file = "%s/%s" % (output_dir, output_path)
def main():
md = urllib2.urlopen('https://raw.githubusercontent.com/syhw/wer_are_we/master/README.md').read()
bs = BeautifulSoup(mistune.markdown(md))
wer_data_file = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/wer.py"))
file_output = "# The file was autogenerated by ../scrapers/wer.py\n\nfrom datetime import date\n\nfrom data.acoustics import speech_recognition,swb_hub_500\nfrom scales import *\n\n"
wer_metrics = []
for table, header in zip(bs.findAll('table'), bs.findAll('h3')):
header = header.getText()
rows = table.findAll('tr')
metric_data = get_metrics(header, rows[0].findAll('th')[:-3], file_output)
metric_names = metric_data[0]
wer_metrics += metric_names
table_data = []
for row in rows:
if row.findAll('td') == []:
continue
measure_data, targets, target_source = add_measures(metric_names, row)
if not targets:
table_data += measure_data
elif not measure_data:
metric_data = get_metrics(header, file_output, targets = targets, target_source = target_source)
file_output = metric_data[1]
file_output += "".join(sorted(table_data))
file_output = file_output + "\n\nwer_metrics=[" + ",".join(wer_metrics) + "]"
with open(wer_data_file, 'wb') as f:
f.write(file_output)
def markdown(text):
return Markup(md.markdown(text,escape=True))
def process_readme(idx, readme_filename, s3_bucket):
if readme_filename.startswith('npm/'):
package_name = readme_filename[len('npm/'):]
if package_name.endswith('/README.json'):
package_name = package_name[:-len('/README.json')]
try:
readme_content = s3_bucket.read_json_file(readme_filename)
except Exception:
_logger.warning("[MISSING_DATA] Readme/NPMJS description for package {} does "
"not exist in S3.".format(package_name))
return
if not readme_content:
npmjs_description = getNPMdescription(package_name)
if not npmjs_description:
_logger.warning("[MISSING_DATA] Readme/NPMJS description for package {} does "
"not exist in S3.".format(package_name))
return
else:
readme_content = {
'type': 'plaintext',
'content': npmjs_description
}
if readme_content['type'] == 'Markdown' or readme_content['type'] == 'plaintext':
readme_content['content'] = returnContentIfAscii(
readme_content['content'].replace('\n', ' '))
if not readme_content['content']:
_logger.warning("[ENCODING] Ignoring package {} as the readme is not in"
" ascii".format(package_name))
return
if readme_content['type'] == 'Markdown':
try:
readme_content = markdown_preprocess(
readme_content['content'])
except Exception:
_logger.warning(
"[CONTENT] Could not get tags for {}".format(package_name))
return
else:
readme_content = readme_content['content']
with open(os.path.join(PATH_PREFIX, package_name.replace('/', ':::')), 'w') as of:
of.write(json.dumps({"id": idx, "text": readme_content}))
curfilename = of.name
of.close()
try:
tags = run_pipeline(curfilename)
if tags:
print(tags)
tags_dict[package_name] = tags
except Exception:
_logger.warning(
"[CONTENT] Could not get tags for {}".format(package_name))
os.remove(curfilename)
else:
_logger.warning("[FORMAT] Skipping {},content is not in markdown format"
" but in {}.".format(readme_filename, readme_content['type']))
def main(argv):
if FLAGS.output_dir and FLAGS.output_file:
sys.stderr.write('Only one of --output_dir or --output_file can be set.')
sys.exit(1)
if not FLAGS.output_dir:
FLAGS.output_dir = DEFAULT_OUTPUT_DIR
if not FLAGS.output_file:
FLAGS.output_file = DEFAULT_OUTPUT_FILE
bzl_files = argv[1:]
try:
strip_prefix = common.validate_strip_prefix(FLAGS.strip_prefix, bzl_files)
except common.InputError as err:
print(err.message)
sys.exit(1)
rulesets = []
load_sym_extractor = load_extractor.LoadExtractor()
for bzl_file in bzl_files:
load_symbols = []
try:
load_symbols = load_sym_extractor.extract(bzl_file)
except load_extractor.LoadExtractorError as e:
print("ERROR: Error extracting loaded symbols from %s: %s" %
(bzl_file, str(e)))
sys.exit(2)
# Todo(dzc): Make MacrodocExtractor and RuleDocExtractor stateless.
macro_doc_extractor = macro_extractor.MacrodocExtractor()
rule_doc_extractor = rule_extractor.RuleDocExtractor()
macro_doc_extractor.parse_bzl(bzl_file)
rule_doc_extractor.parse_bzl(bzl_file, load_symbols)
merged_language = merge_languages(macro_doc_extractor.proto(),
rule_doc_extractor.proto())
rulesets.append(
rule.RuleSet(bzl_file, merged_language, macro_doc_extractor.title,
macro_doc_extractor.description, strip_prefix,
FLAGS.format))
writer_options = WriterOptions(
FLAGS.output_dir, FLAGS.output_file, FLAGS.zip, FLAGS.overview,
FLAGS.overview_filename, FLAGS.link_ext, FLAGS.site_root)
if FLAGS.format == "markdown":
markdown_writer = MarkdownWriter(writer_options)
markdown_writer.write(rulesets)
elif FLAGS.format == "html":
html_writer = HtmlWriter(writer_options)
html_writer.write(rulesets)
else:
sys.stderr.write(
'Invalid output format: %s. Possible values are markdown and html'
% FLAGS.format)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。