Syntax highlighting mit BeautifulSoup4

Da ich ein kleines Problem mit den HTML Entities hatte, habe ich die Gunst der Stunde genutzt, dass Snippet Markdown and Syntax Highlighting in Django auf BeautifulSoup4 upzudaten.

from django import template
register = template.Library()

# Pygments: http://pygments.org -- a generic syntax highlighter.
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import get_lexer_by_name, guess_lexer

# Python Markdown
from markdown import markdown

# Need to unescape HTML Entities
from HTMLParser import HTMLParser

# BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup/
from bs4 import BeautifulSoup, Tag 

@register.filter(is_safe=True)
def render(content, safe="unsafe"):
    """Render this content for display."""

    # First, pull out all the code blocks, to keep them away
    # from Markdown (and preserve whitespace).
    soup = BeautifulSoup(unicode(content), 'html.parser' )
    code_blocks = soup.findAll('code')
    for block in code_blocks:
        tag = Tag(name='removed')
        block.replaceWith(tag)

    # Run the post through markdown.
    if safe == "unsafe":
        safe_mode = False
    else:
        safe_mode = True
    markeddown = markdown(unicode(soup), safe_mode=safe_mode)
    # Replace the pulled code blocks with syntax-highlighted versions.
    soup = BeautifulSoup(markeddown, 'html.parser')
    empty_code_blocks, index = soup.findAll('removed'), 0
    formatter = HtmlFormatter(cssclass='source')
    for block in code_blocks:
        if block.has_attr('class'):
            # class="python"
            language = block['class'][0]
        else:
            # plain text, whitespace-preserved
            language = 'text'
        try:
            lexer = get_lexer_by_name(language, stripnl=True, encoding='UTF-8')
        except ValueError, e:
            try:
                # Guess a lexer by the contents of the block.
                lexer = guess_lexer(block.renderContents())
            except ValueError, e:
                # Just make it plain text.
                lexer = get_lexer_by_name('text', stripnl=True, encoding='UTF-8')
        tag = Tag(name='code')
        tag['class'] = language
        unescaped_content = HTMLParser().unescape(block.renderContents())
        tag.insert(0, BeautifulSoup(highlight(unescaped_content, lexer, formatter), 'html.parser'))
        empty_code_blocks[index].replaceWith(tag)
        index = index + 1

    return str(soup)
Nun funktioniert auch wieder <,>,& usw.

Tags:

Die letzten Einträge