Syntax highlighting mit BeautifulSoup4
Da ich ein kleines Problem mit den HTML Entities hatte, habe ich die Gunst der Stunde genutzt, dass Snippet Markdown and Syntax Highlighting in Django auf BeautifulSoup4 upzudaten.
Nun funktioniert auch wieder <,>,& usw. from django import template
register = template.Library()
# Pygments: http://pygments.org -- a generic syntax highlighter.
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import get_lexer_by_name, guess_lexer
# Python Markdown
from markdown import markdown
# Need to unescape HTML Entities
from HTMLParser import HTMLParser
# BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup/
from bs4 import BeautifulSoup, Tag
@register.filter(is_safe=True)
def render(content, safe="unsafe"):
"""Render this content for display."""
# First, pull out all the code blocks, to keep them away
# from Markdown (and preserve whitespace).
soup = BeautifulSoup(unicode(content), 'html.parser' )
code_blocks = soup.findAll('code')
for block in code_blocks:
tag = Tag(name='removed')
block.replaceWith(tag)
# Run the post through markdown.
if safe == "unsafe":
safe_mode = False
else:
safe_mode = True
markeddown = markdown(unicode(soup), safe_mode=safe_mode)
# Replace the pulled code blocks with syntax-highlighted versions.
soup = BeautifulSoup(markeddown, 'html.parser')
empty_code_blocks, index = soup.findAll('removed'), 0
formatter = HtmlFormatter(cssclass='source')
for block in code_blocks:
if block.has_attr('class'):
# class="python"
language = block['class'][0]
else:
# plain text, whitespace-preserved
language = 'text'
try:
lexer = get_lexer_by_name(language, stripnl=True, encoding='UTF-8')
except ValueError, e:
try:
# Guess a lexer by the contents of the block.
lexer = guess_lexer(block.renderContents())
except ValueError, e:
# Just make it plain text.
lexer = get_lexer_by_name('text', stripnl=True, encoding='UTF-8')
tag = Tag(name='code')
tag['class'] = language
unescaped_content = HTMLParser().unescape(block.renderContents())
tag.insert(0, BeautifulSoup(highlight(unescaped_content, lexer, formatter), 'html.parser'))
empty_code_blocks[index].replaceWith(tag)
index = index + 1
return str(soup)
Die letzten Einträge
- HTML UTF-8 Kodierung Mo 5.08.13 17:51
- Syntax highlighting mit BeautifulSoup4 Di 2.07.13 17:04
- Git Repositorys verwalten mit Gitolite Do 15.03.12 17:56
- Website verfügbarkeit mit Nagios Sa 21.01.12 17:40
- Checkbox für ManyToManyField in Django Fr 14.10.11 15:57