2022-06-22 18:11:22 +00:00
|
|
|
import re
|
2022-08-24 18:12:10 +00:00
|
|
|
import typing
|
2022-06-22 18:11:22 +00:00
|
|
|
|
2022-11-12 09:04:37 +00:00
|
|
|
from loguru import logger
|
2022-10-04 18:26:01 +00:00
|
|
|
from mistletoe import Document # type: ignore
|
|
|
|
from mistletoe.html_renderer import HTMLRenderer # type: ignore
|
|
|
|
from mistletoe.span_token import SpanToken # type: ignore
|
|
|
|
from pygments import highlight # type: ignore
|
|
|
|
from pygments.formatters import HtmlFormatter # type: ignore
|
|
|
|
from pygments.lexers import get_lexer_by_name as get_lexer # type: ignore
|
|
|
|
from pygments.lexers import guess_lexer # type: ignore
|
2022-06-29 06:56:39 +00:00
|
|
|
from sqlalchemy import select
|
2022-06-22 18:11:22 +00:00
|
|
|
|
|
|
|
from app import webfinger
|
|
|
|
from app.config import BASE_URL
|
2022-10-04 18:26:01 +00:00
|
|
|
from app.config import CODE_HIGHLIGHTING_THEME
|
2022-06-29 18:43:17 +00:00
|
|
|
from app.database import AsyncSession
|
2022-06-27 18:55:44 +00:00
|
|
|
from app.utils import emoji
|
2022-06-22 18:11:22 +00:00
|
|
|
|
2022-08-24 18:12:10 +00:00
|
|
|
if typing.TYPE_CHECKING:
|
|
|
|
from app.actor import Actor
|
|
|
|
|
2022-10-04 18:26:01 +00:00
|
|
|
_FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
|
|
|
|
_HASHTAG_REGEX = re.compile(r"(#[\d\w]+)")
|
2022-10-05 18:05:16 +00:00
|
|
|
_MENTION_REGEX = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
|
|
|
|
_URL_REGEX = re.compile(
|
|
|
|
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
|
|
|
|
)
|
2022-06-22 18:11:22 +00:00
|
|
|
|
|
|
|
|
2022-10-04 18:26:01 +00:00
|
|
|
class AutoLink(SpanToken):
|
|
|
|
parse_inner = False
|
2022-10-19 19:09:30 +00:00
|
|
|
precedence = 1
|
2022-10-05 18:05:16 +00:00
|
|
|
pattern = _URL_REGEX
|
2022-06-22 18:11:22 +00:00
|
|
|
|
2022-10-04 18:26:01 +00:00
|
|
|
def __init__(self, match_obj: re.Match) -> None:
|
|
|
|
self.target = match_obj.group()
|
2022-06-22 18:11:22 +00:00
|
|
|
|
|
|
|
|
2022-10-04 18:26:01 +00:00
|
|
|
class Mention(SpanToken):
|
|
|
|
parse_inner = False
|
|
|
|
precedence = 10
|
2022-10-05 18:05:16 +00:00
|
|
|
pattern = _MENTION_REGEX
|
2022-10-04 18:26:01 +00:00
|
|
|
|
|
|
|
def __init__(self, match_obj: re.Match) -> None:
|
|
|
|
self.target = match_obj.group()
|
|
|
|
|
|
|
|
|
|
|
|
class Hashtag(SpanToken):
|
|
|
|
parse_inner = False
|
|
|
|
precedence = 10
|
2022-10-05 18:05:16 +00:00
|
|
|
pattern = _HASHTAG_REGEX
|
2022-10-04 18:26:01 +00:00
|
|
|
|
|
|
|
def __init__(self, match_obj: re.Match) -> None:
|
|
|
|
self.target = match_obj.group()
|
|
|
|
|
|
|
|
|
|
|
|
class CustomRenderer(HTMLRenderer):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
mentioned_actors: dict[str, "Actor"] = {},
|
|
|
|
enable_mentionify: bool = True,
|
|
|
|
enable_hashtagify: bool = True,
|
|
|
|
) -> None:
|
|
|
|
extra_tokens = []
|
|
|
|
if enable_mentionify:
|
|
|
|
extra_tokens.append(Mention)
|
|
|
|
if enable_hashtagify:
|
|
|
|
extra_tokens.append(Hashtag)
|
|
|
|
super().__init__(AutoLink, *extra_tokens)
|
|
|
|
|
|
|
|
self.tags: list[dict[str, str]] = []
|
|
|
|
self.mentioned_actors = mentioned_actors
|
|
|
|
|
|
|
|
def render_auto_link(self, token: AutoLink) -> str:
|
|
|
|
template = '<a href="{target}" rel="noopener">{inner}</a>'
|
|
|
|
target = self.escape_url(token.target)
|
|
|
|
return template.format(target=target, inner=target)
|
|
|
|
|
|
|
|
def render_mention(self, token: Mention) -> str:
|
|
|
|
mention = token.target
|
2022-11-12 09:04:37 +00:00
|
|
|
suffix = ""
|
|
|
|
if mention.endswith("."):
|
|
|
|
mention = mention[:-1]
|
|
|
|
suffix = "."
|
2022-10-04 18:26:01 +00:00
|
|
|
actor = self.mentioned_actors.get(mention)
|
|
|
|
if not actor:
|
|
|
|
return mention
|
|
|
|
|
|
|
|
self.tags.append(dict(type="Mention", href=actor.ap_id, name=mention))
|
|
|
|
|
2022-11-12 09:04:37 +00:00
|
|
|
link = f'<span class="h-card"><a href="{actor.url}" class="u-url mention">{actor.handle}</a></span>{suffix}' # noqa: E501
|
2022-10-04 18:26:01 +00:00
|
|
|
return link
|
|
|
|
|
|
|
|
def render_hashtag(self, token: Hashtag) -> str:
|
|
|
|
tag = token.target[1:]
|
2022-10-05 18:05:16 +00:00
|
|
|
link = f'<a href="{BASE_URL}/t/{tag.lower()}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
|
2022-10-04 18:26:01 +00:00
|
|
|
self.tags.append(
|
2022-10-05 18:05:16 +00:00
|
|
|
dict(
|
|
|
|
href=f"{BASE_URL}/t/{tag.lower()}",
|
|
|
|
name=token.target.lower(),
|
|
|
|
type="Hashtag",
|
|
|
|
)
|
2022-10-04 18:26:01 +00:00
|
|
|
)
|
|
|
|
return link
|
2022-06-22 18:11:22 +00:00
|
|
|
|
2022-10-04 18:26:01 +00:00
|
|
|
def render_block_code(self, token: typing.Any) -> str:
|
|
|
|
code = token.children[0].content
|
|
|
|
lexer = get_lexer(token.language) if token.language else guess_lexer(code)
|
|
|
|
return highlight(code, lexer, _FORMATTER)
|
2022-06-22 18:11:22 +00:00
|
|
|
|
2022-10-04 18:26:01 +00:00
|
|
|
|
|
|
|
async def _prefetch_mentioned_actors(
|
2022-06-29 18:43:17 +00:00
|
|
|
db_session: AsyncSession,
|
2022-06-26 17:00:29 +00:00
|
|
|
content: str,
|
2022-10-04 18:26:01 +00:00
|
|
|
) -> dict[str, "Actor"]:
|
2022-08-24 18:12:10 +00:00
|
|
|
from app import models
|
|
|
|
from app.actor import fetch_actor
|
|
|
|
|
2022-10-04 18:26:01 +00:00
|
|
|
actors = {}
|
|
|
|
|
2022-06-22 18:11:22 +00:00
|
|
|
for mention in re.findall(_MENTION_REGEX, content):
|
2022-10-04 18:26:01 +00:00
|
|
|
if mention in actors:
|
|
|
|
continue
|
|
|
|
|
2022-11-12 09:04:37 +00:00
|
|
|
# XXX: the regex catches stuff like `@toto@example.com.`
|
|
|
|
if mention.endswith("."):
|
|
|
|
mention = mention[:-1]
|
|
|
|
|
|
|
|
try:
|
|
|
|
_, username, domain = mention.split("@")
|
|
|
|
actor = (
|
|
|
|
await db_session.execute(
|
|
|
|
select(models.Actor).where(
|
|
|
|
models.Actor.handle == mention,
|
|
|
|
models.Actor.is_deleted.is_(False),
|
|
|
|
)
|
2022-08-31 17:44:40 +00:00
|
|
|
)
|
2022-11-12 09:04:37 +00:00
|
|
|
).scalar_one_or_none()
|
|
|
|
if not actor:
|
|
|
|
actor_url = await webfinger.get_actor_url(mention)
|
|
|
|
if not actor_url:
|
|
|
|
# FIXME(ts): raise an error?
|
|
|
|
continue
|
|
|
|
actor = await fetch_actor(db_session, actor_url)
|
|
|
|
|
|
|
|
actors[mention] = actor
|
|
|
|
except Exception:
|
|
|
|
logger.exception(f"Failed to prefetch {mention}")
|
2022-06-22 18:11:22 +00:00
|
|
|
|
2022-10-04 18:26:01 +00:00
|
|
|
return actors
|
|
|
|
|
|
|
|
|
2022-10-05 18:05:16 +00:00
|
|
|
def hashtagify(
|
|
|
|
content: str,
|
|
|
|
) -> tuple[str, list[dict[str, str]]]:
|
2022-10-04 18:26:01 +00:00
|
|
|
tags = []
|
2022-10-05 18:05:16 +00:00
|
|
|
with CustomRenderer(
|
|
|
|
mentioned_actors={},
|
|
|
|
enable_mentionify=False,
|
|
|
|
enable_hashtagify=True,
|
|
|
|
) as renderer:
|
|
|
|
rendered_content = renderer.render(Document(content))
|
|
|
|
tags.extend(renderer.tags)
|
|
|
|
|
|
|
|
# Handle custom emoji
|
|
|
|
tags.extend(emoji.tags(content))
|
|
|
|
|
|
|
|
return rendered_content, tags
|
2022-06-22 18:11:22 +00:00
|
|
|
|
|
|
|
|
2022-06-29 18:43:17 +00:00
|
|
|
async def markdownify(
|
|
|
|
db_session: AsyncSession,
|
2022-06-22 18:11:22 +00:00
|
|
|
content: str,
|
2022-08-24 18:12:10 +00:00
|
|
|
enable_mentionify: bool = True,
|
|
|
|
enable_hashtagify: bool = True,
|
|
|
|
) -> tuple[str, list[dict[str, str]], list["Actor"]]:
|
2022-06-22 18:11:22 +00:00
|
|
|
"""
|
|
|
|
>>> content, tags = markdownify("Hello")
|
|
|
|
|
|
|
|
"""
|
|
|
|
tags = []
|
2022-10-04 18:26:01 +00:00
|
|
|
mentioned_actors: dict[str, "Actor"] = {}
|
2022-08-24 18:12:10 +00:00
|
|
|
if enable_mentionify:
|
2022-10-04 18:26:01 +00:00
|
|
|
mentioned_actors = await _prefetch_mentioned_actors(db_session, content)
|
|
|
|
|
|
|
|
with CustomRenderer(
|
|
|
|
mentioned_actors=mentioned_actors,
|
|
|
|
enable_mentionify=enable_mentionify,
|
|
|
|
enable_hashtagify=enable_hashtagify,
|
|
|
|
) as renderer:
|
|
|
|
rendered_content = renderer.render(Document(content))
|
|
|
|
tags.extend(renderer.tags)
|
2022-06-27 18:55:44 +00:00
|
|
|
|
|
|
|
# Handle custom emoji
|
|
|
|
tags.extend(emoji.tags(content))
|
|
|
|
|
2022-10-20 17:39:55 +00:00
|
|
|
return rendered_content, dedup_tags(tags), list(mentioned_actors.values())
|
2022-10-05 18:05:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
def dedup_tags(tags: list[dict[str, str]]) -> list[dict[str, str]]:
|
|
|
|
idx = set()
|
|
|
|
deduped_tags = []
|
|
|
|
for tag in tags:
|
|
|
|
tag_idx = (tag["type"], tag["name"])
|
|
|
|
if tag_idx in idx:
|
|
|
|
continue
|
|
|
|
|
|
|
|
idx.add(tag_idx)
|
|
|
|
deduped_tags.append(tag)
|
|
|
|
|
|
|
|
return deduped_tags
|