microblog/app/utils/highlight.py

33 lines
1005 B
Python
Raw Normal View History

2022-06-22 18:11:22 +00:00
from functools import lru_cache
from bs4 import BeautifulSoup # type: ignore
from pygments import highlight as phighlight # type: ignore
from pygments.formatters import HtmlFormatter # type: ignore
from pygments.lexers import guess_lexer # type: ignore
from app.config import CODE_HIGHLIGHTING_THEME
_FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
2022-06-22 18:11:22 +00:00
HIGHLIGHT_CSS = _FORMATTER.get_style_defs()
@lru_cache(256)
def highlight(html: str) -> str:
soup = BeautifulSoup(html, "html5lib")
for code in soup.find_all("code"):
if not code.parent.name == "pre":
continue
2022-07-12 07:43:50 +00:00
code_content = (
code.encode_contents().decode().replace("<br>", "\n").replace("<br/>", "\n")
)
lexer = guess_lexer(code_content)
2022-06-22 18:11:22 +00:00
tag = BeautifulSoup(
2022-07-12 07:43:50 +00:00
phighlight(code_content, lexer, _FORMATTER), "html5lib"
2022-06-22 18:11:22 +00:00
).body.next
pre = code.parent
pre.replaceWith(tag)
out = soup.body
out.name = "div"
return str(out)