Fix OG metadata processing

main
Thomas Sileo 2022-08-28 19:05:06 +02:00
parent 87f035d298
commit 1a7e9e4565
2 changed files with 22 additions and 6 deletions

View File

@ -112,10 +112,13 @@ async def process_next_incoming_activity(
if next_activity.ap_object and next_activity.sent_by_ap_actor_id: if next_activity.ap_object and next_activity.sent_by_ap_actor_id:
try: try:
async with db_session.begin_nested(): async with db_session.begin_nested():
await save_to_inbox( await asyncio.wait_for(
db_session, save_to_inbox(
next_activity.ap_object, db_session,
next_activity.sent_by_ap_actor_id, next_activity.ap_object,
next_activity.sent_by_ap_actor_id,
),
timeout=60,
) )
except httpx.TimeoutException as exc: except httpx.TimeoutException as exc:
url = exc._request.url if exc._request else None url = exc._request.url if exc._request else None

View File

@ -1,3 +1,4 @@
import asyncio
import mimetypes import mimetypes
import re import re
from typing import Any from typing import Any
@ -36,7 +37,7 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
# FIXME some page have no <title> # FIXME some page have no <title>
raw = { raw = {
"url": url, "url": url,
"title": soup.find("title").text, "title": soup.find("title").text.strip(),
"image": None, "image": None,
"description": None, "description": None,
"site_name": urlparse(url).hostname, "site_name": urlparse(url).hostname,
@ -124,9 +125,21 @@ async def og_meta_from_note(
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
og_meta = [] og_meta = []
urls = await external_urls(db_session, ro) urls = await external_urls(db_session, ro)
logger.debug(f"Lookig OG metadata in {urls=}")
for url in urls: for url in urls:
logger.debug(f"Processing {url}")
try: try:
maybe_og_meta = await _og_meta_from_url(url) maybe_og_meta = None
try:
maybe_og_meta = await asyncio.wait_for(
_og_meta_from_url(url),
timeout=5,
)
except asyncio.TimeoutError:
logger.info(f"Timing out fetching {url}")
except Exception:
logger.exception(f"Failed scrap OG meta for {url}")
if maybe_og_meta: if maybe_og_meta:
og_meta.append(maybe_og_meta.dict()) og_meta.append(maybe_og_meta.dict())
except httpx.HTTPError: except httpx.HTTPError: