More Markdown improvements

2022-10-05 20:05:16 +02:00 · 2022-10-05 20:05:16 +02:00 · 5eaa0f291b
parent 881d0ad899
commit 5eaa0f291b
2 changed files with 59 additions and 31 deletions
--- a/app/activitypub.py
+++ b/app/activitypub.py
@ -6,7 +6,6 @@ from typing import Any

 import httpx
 from loguru import logger
-from markdown import markdown

 from app import config
 from app.config import ALSO_KNOWN_AS
@ -14,6 +13,7 @@ from app.config import AP_CONTENT_TYPE  # noqa: F401
 from app.config import MOVED_TO
 from app.httpsig import auth
 from app.key import get_pubkey_as_pem
+from app.source import dedup_tags
 from app.source import hashtagify
 from app.utils.url import check_url

@ -101,6 +101,19 @@ class VisibilityEnum(str, enum.Enum):


 _LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary)
+_LOCAL_ACTOR_METADATA = []
+if config.CONFIG.metadata:
+    for kv in config.CONFIG.metadata:
+        kv_value, kv_tags = hashtagify(kv.value)
+        _LOCAL_ACTOR_METADATA.append(
+            {
+                "name": kv.key,
+                "type": "PropertyValue",
+                "value": kv_value,
+            }
+        )
+        _LOCAL_ACTOR_TAGS.extend(kv_tags)
+

 ME = {
    "@context": AS_EXTENDED_CTX,
@ -113,7 +126,7 @@ ME = {
    "outbox": config.BASE_URL + "/outbox",
    "preferredUsername": config.USERNAME,
    "name": config.CONFIG.name,
-    "summary": markdown(_LOCAL_ACTOR_SUMMARY, extensions=["mdx_linkify"]),
+    "summary": _LOCAL_ACTOR_SUMMARY,
    "endpoints": {
        # For compat with servers expecting a sharedInbox...
        "sharedInbox": config.BASE_URL
@ -121,16 +134,7 @@ ME = {
    },
    "url": config.ID + "/",  # XXX: the path is important for Mastodon compat
    "manuallyApprovesFollowers": config.CONFIG.manually_approves_followers,
-    "attachment": [
-        {
-            "name": kv.key,
-            "type": "PropertyValue",
-            "value": markdown(kv.value, extensions=["mdx_linkify", "fenced_code"]),
-        }
-        for kv in config.CONFIG.metadata
-    ]
-    if config.CONFIG.metadata
-    else [],
+    "attachment": _LOCAL_ACTOR_METADATA,
    "icon": {
        "mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0],
        "type": "Image",
@ -141,7 +145,7 @@ ME = {
        "owner": config.ID,
        "publicKeyPem": get_pubkey_as_pem(config.KEY_PATH),
    },
-    "tag": _LOCAL_ACTOR_TAGS,
+    "tag": dedup_tags(_LOCAL_ACTOR_TAGS),
 }

 if ALSO_KNOWN_AS:
--- a/app/source.py
+++ b/app/source.py
@ -21,15 +21,16 @@ if typing.TYPE_CHECKING:

 _FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
 _HASHTAG_REGEX = re.compile(r"(#[\d\w]+)")
-_MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
+_MENTION_REGEX = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
+_URL_REGEX = re.compile(
+    "(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))"  # noqa: E501
+)


 class AutoLink(SpanToken):
    parse_inner = False
    precedence = 10
-    pattern = re.compile(
-        "(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))"  # noqa: E501
-    )
+    pattern = _URL_REGEX

    def __init__(self, match_obj: re.Match) -> None:
        self.target = match_obj.group()
@ -38,7 +39,7 @@ class AutoLink(SpanToken):
 class Mention(SpanToken):
    parse_inner = False
    precedence = 10
-    pattern = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
+    pattern = _MENTION_REGEX

    def __init__(self, match_obj: re.Match) -> None:
        self.target = match_obj.group()
@ -47,7 +48,7 @@ class Mention(SpanToken):
 class Hashtag(SpanToken):
    parse_inner = False
    precedence = 10
-    pattern = re.compile(r"(#[\d\w]+)")
+    pattern = _HASHTAG_REGEX

    def __init__(self, match_obj: re.Match) -> None:
        self.target = match_obj.group()
@ -88,9 +89,13 @@ class CustomRenderer(HTMLRenderer):

    def render_hashtag(self, token: Hashtag) -> str:
        tag = token.target[1:]
-        link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>'  # noqa: E501
+        link = f'<a href="{BASE_URL}/t/{tag.lower()}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>'  # noqa: E501
        self.tags.append(
-            dict(href=f"{BASE_URL}/t/{tag}", name=token.target, type="Hashtag")
+            dict(
+                href=f"{BASE_URL}/t/{tag.lower()}",
+                name=token.target.lower(),
+                type="Hashtag",
+            )
        )
        return link

@ -134,17 +139,22 @@ async def _prefetch_mentioned_actors(
    return actors


-def hashtagify(content: str) -> tuple[str, list[dict[str, str]]]:
-    # TODO: fix this, switch to mistletoe?
+def hashtagify(
+    content: str,
+) -> tuple[str, list[dict[str, str]]]:
    tags = []
-    hashtags = re.findall(_HASHTAG_REGEX, content)
-    hashtags = sorted(set(hashtags), reverse=True)  # unique tags, longest first
-    for hashtag in hashtags:
-        tag = hashtag[1:]
-        link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>'  # noqa: E501
-        tags.append(dict(href=f"{BASE_URL}/t/{tag}", name=hashtag, type="Hashtag"))
-        content = content.replace(hashtag, link)
-    return content, tags
+    with CustomRenderer(
+        mentioned_actors={},
+        enable_mentionify=False,
+        enable_hashtagify=True,
+    ) as renderer:
+        rendered_content = renderer.render(Document(content))
+        tags.extend(renderer.tags)
+
+    # Handle custom emoji
+    tags.extend(emoji.tags(content))
+
+    return rendered_content, tags


 async def markdownify(
@ -174,3 +184,17 @@ async def markdownify(
    tags.extend(emoji.tags(content))

    return rendered_content, tags, list(mentioned_actors.values())
+
+
+def dedup_tags(tags: list[dict[str, str]]) -> list[dict[str, str]]:
+    idx = set()
+    deduped_tags = []
+    for tag in tags:
+        tag_idx = (tag["type"], tag["name"])
+        if tag_idx in idx:
+            continue
+
+        idx.add(tag_idx)
+        deduped_tags.append(tag)
+
+    return deduped_tags