From 9733c0b5c887f490e2705b22319854d09a652de4 Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Tue, 12 Jul 2022 22:24:15 +0200 Subject: [PATCH] Improve code highlight --- app/scss/main.scss | 1 + app/source.py | 2 +- app/utils/highlight.py | 33 ++++++++++++++++++++++++--------- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/app/scss/main.scss b/app/scss/main.scss index 10cdace..2b1e95c 100644 --- a/app/scss/main.scss +++ b/app/scss/main.scss @@ -50,6 +50,7 @@ div.highlight { padding: 10px; overflow: auto; display: block; + margin: 20px 0; } .box { diff --git a/app/source.py b/app/source.py index 70c6e03..cd80bc1 100644 --- a/app/source.py +++ b/app/source.py @@ -88,6 +88,6 @@ async def markdownify( # Handle custom emoji tags.extend(emoji.tags(content)) - content = markdown(content, extensions=["mdx_linkify", "fenced_code", "codehilite"]) + content = markdown(content, extensions=["mdx_linkify", "fenced_code"]) return content, tags, mentioned_actors diff --git a/app/utils/highlight.py b/app/utils/highlight.py index 66ecbb1..4915527 100644 --- a/app/utils/highlight.py +++ b/app/utils/highlight.py @@ -3,6 +3,7 @@ from functools import lru_cache from bs4 import BeautifulSoup # type: ignore from pygments import highlight as phighlight # type: ignore from pygments.formatters import HtmlFormatter # type: ignore +from pygments.lexers import get_lexer_by_name # type: ignore from pygments.lexers import guess_lexer # type: ignore from app.config import CODE_HIGHLIGHTING_THEME @@ -18,15 +19,29 @@ def highlight(html: str) -> str: for code in soup.find_all("code"): if not code.parent.name == "pre": continue + + # Replace
tags with line breaks (Mastodon sends code like this) code_content = ( code.encode_contents().decode().replace("
", "\n").replace("
", "\n") ) - lexer = guess_lexer(code_content) - tag = BeautifulSoup( - phighlight(code_content, lexer, _FORMATTER), "html5lib" - ).body.next - pre = code.parent - pre.replaceWith(tag) - out = soup.body - out.name = "div" - return str(out) + + # If this comes from a microblog.pub instance we may have the language + # in the class name + if "class" in code.attrs and code.attrs["class"][0].startswith("language-"): + try: + lexer = get_lexer_by_name( + code.attrs["class"][0].removeprefix("language-") + ) + except Exception: + lexer = guess_lexer(code_content) + else: + lexer = guess_lexer(code_content) + + # Replace the code with Pygment output + code.parent.replaceWith( + BeautifulSoup( + phighlight(code_content, lexer, _FORMATTER), "html5lib" + ).body.next + ) + + return soup.body.encode_contents().decode()