From 735da3498d1e06b1ea2d08df749832516382dd3b Mon Sep 17 00:00:00 2001 From: Jordan Santell Date: Tue, 21 Oct 2025 15:29:49 -0700 Subject: [PATCH] chore: Update rss recipe to use handlers again, pull out and test RSS parsing --- .../{dom-parser.test.ts => rss-utils.test.ts} | 26 +++- recipes/rss-utils.ts | 113 +++++++++++++++ recipes/rss.tsx | 133 ++---------------- 3 files changed, 148 insertions(+), 124 deletions(-) rename recipes/{dom-parser.test.ts => rss-utils.test.ts} (75%) create mode 100644 recipes/rss-utils.ts diff --git a/recipes/dom-parser.test.ts b/recipes/rss-utils.test.ts similarity index 75% rename from recipes/dom-parser.test.ts rename to recipes/rss-utils.test.ts index 84a7985a2..c7560ab9b 100644 --- a/recipes/dom-parser.test.ts +++ b/recipes/rss-utils.test.ts @@ -1,5 +1,6 @@ import { DOMParser } from "./dom-parser.ts"; -import { assert } from "@std/assert"; +import { assert, assertObjectMatch } from "@std/assert"; +import { FeedItem, parseRSSFeed } from "./rss-utils.ts"; const xml = ` @@ -38,7 +39,7 @@ const xml = ` `; -Deno.test("DOMParser parsers XML", () => { +Deno.test("DOMParser/XML", () => { const parser = new DOMParser(); const doc = parser.parseFromString(xml, "text/xml"); const entries = doc.getElementsByTagName("entry"); @@ -60,3 +61,24 @@ Deno.test("DOMParser parsers XML", () => { "Get textContent", ); }); + +Deno.test("parseRSSFeed()", () => { + const entries = parseRSSFeed( + xml, + 5, + new Set(["tag:www.githubstatus.com,2005:Incident/26833707"]), + ); + assert( + entries.length === 2, + "Expecting 2 entries after filtering one existing", + ); + assertObjectMatch(entries[0], { + author: "", + id: "tag:www.githubstatus.com,2005:Incident/26837586", + pubDate: "2025-10-21T17:39:34Z", + title: "Disruption with some GitHub services", + link: "https://www.githubstatus.com/incidents/v61nk2fpysnq", + content: + "<p><small>Oct <var data-var='date'>21</var>, <var data-var='time'>17:39</var> UTC</small><br><strong>Resolved</strong> - This incident has been resolved. Thank you for your patience and understanding as we addressed this issue. A detailed root cause analysis will be shared as soon as it is available.</p><p><small>Oct <var data-var='date'>21</var>, <var data-var='time'>17:18</var> UTC</small><br><strong>Update</strong> - Mitigation continues, the impact is limited to Enterprise Cloud customers who have configured SAML at the organization level.</p><p><small>Oct <var data-var='date'>21</var>, <var data-var='time'>17:11</var> UTC</small><br><strong>Update</strong> - We continuing to work on mitigation of this issue.</p><p><small>Oct <var data-var='date'>21</var>, <var data-var='time'>16:33</var> UTC</small><br><strong>Update</strong> - We’ve identified the issue affecting some users with SAML/OIDC authentication and are actively working on mitigation. Some users may not be able to authenticate during this time.</p><p><small>Oct <var data-var='date'>21</var>, <var data-var='time'>16:03</var> UTC</small><br><strong>Update</strong> - We're seeing issues for a small amount of customers with SAML/OIDC authentication for GitHub.com users. We are investigating.</p><p><small>Oct <var data-var='date'>21</var>, <var data-var='time'>16:00</var> UTC</small><br><strong>Investigating</strong> - We are currently investigating this issue.</p>", + } as FeedItem); +}); diff --git a/recipes/rss-utils.ts b/recipes/rss-utils.ts new file mode 100644 index 000000000..a73b853b3 --- /dev/null +++ b/recipes/rss-utils.ts @@ -0,0 +1,113 @@ +import { DOMParser, type Element } from "./dom-parser.ts"; + +export type FeedItem = { + id: string; + title: string; + link: string; + description: string; + pubDate: string; + author: string; + content: string; +}; + +export function parseRSSFeed( + textXML: string, + maxResults: number = 100, + existingIds: Set, +): FeedItem[] { + const parser = new DOMParser(); + const doc = parser.parseFromString(textXML, "text/xml"); + // Helper function to get text content from an element + const getTextContent = (element: Element | null, tagName: string) => { + const el = element?.getElementsByTagName(tagName)[0]; + return el?.textContent?.trim() || ""; + }; + + // Helper function to get attribute value + const getAttributeValue = ( + element: Element | null, + tagName: string, + attrName: string, + ) => { + const el = element?.getElementsByTagName(tagName)[0]; + return el?.getAttribute(attrName) || ""; + }; + + const retrievedItems: FeedItem[] = []; + + // Check if it's an Atom feed + const isAtom = doc.getElementsByTagName("feed").length !== 0; + + if (isAtom) { + // Parse Atom feed + const entries = doc.getElementsByTagName("entry"); + + for (let i = 0; i < Math.min(entries.length, maxResults); i++) { + const entry = entries[i]; + + // In Atom, id is mandatory + const id = getTextContent(entry, "id") || Math.random().toString(36); + + // Skip if we already have this item + if (existingIds.has(id)) { + continue; + } + + // Parse link - in Atom links are elements with href attributes + const link = getAttributeValue(entry, "link", "href"); + + // For content, check content tag first, then summary + const content = getTextContent(entry, "content") || + getTextContent(entry, "summary"); + + // For author, it might be nested as Author + let author = ""; + const authorEl = entry.getElementsByTagName("author")[0]; + if (authorEl) { + author = getTextContent(authorEl, "name"); + } + + // For pubDate, Atom uses or + const pubDate = getTextContent(entry, "published") || + getTextContent(entry, "updated"); + + retrievedItems.push({ + id, + title: getTextContent(entry, "title"), + link, + description: getTextContent(entry, "summary"), + pubDate, + author, + content, + }); + } + } else { + // Parse RSS feed + const rssItems = doc.getElementsByTagName("item"); + + for (let i = 0; i < Math.min(rssItems.length, maxResults); i++) { + const item = rssItems[i]; + + const id = getTextContent(item, "guid") || + getTextContent(item, "link") || + Math.random().toString(36); + + if (existingIds.has(id)) { + continue; + } + + retrievedItems.push({ + id, + title: getTextContent(item, "title"), + link: getTextContent(item, "link"), + description: getTextContent(item, "description"), + pubDate: getTextContent(item, "pubDate"), + author: getTextContent(item, "author"), + content: getTextContent(item, "content:encoded") || + getTextContent(item, "description"), + }); + } + } + + return retrievedItems; +} diff --git a/recipes/rss.tsx b/recipes/rss.tsx index 3bcc05097..6b3835d68 100644 --- a/recipes/rss.tsx +++ b/recipes/rss.tsx @@ -1,138 +1,27 @@ /// import { + Cell, cell, Default, derive, fetchData, - lift, + handler, NAME, recipe, str, UI, } from "commontools"; -import { DOMParser, type Element } from "./dom-parser.ts"; +import { type FeedItem, parseRSSFeed } from "./rss-utils.ts"; interface Settings { feedUrl: Default; limit: Default; } -type FeedItem = { - id: string; - title: string; - link: string; - description: string; - pubDate: string; - author: string; - content: string; -}; - -function parseRSSFeed( - textXML: string, - maxResults: number = 100, - existingIds: Set, -): FeedItem[] { - const parser = new DOMParser(); - const doc = parser.parseFromString(textXML, "text/xml"); - // Helper function to get text content from an element - const getTextContent = (element: Element | null, tagName: string) => { - const el = element?.getElementsByTagName(tagName)[0]; - return el?.textContent?.trim() || ""; - }; - - // Helper function to get attribute value - const getAttributeValue = ( - element: Element | null, - tagName: string, - attrName: string, - ) => { - const el = element?.getElementsByTagName(tagName)[0]; - return el?.getAttribute(attrName) || ""; - }; - - const retrievedItems: FeedItem[] = []; - - // Check if it's an Atom feed - const isAtom = doc.getElementsByTagName("feed").length !== 0; - - if (isAtom) { - // Parse Atom feed - const entries = doc.getElementsByTagName("entry"); - - for (let i = 0; i < Math.min(entries.length, maxResults); i++) { - const entry = entries[i]; - - // In Atom, id is mandatory - const id = getTextContent(entry, "id") || Math.random().toString(36); - - // Skip if we already have this item - if (existingIds.has(id)) { - continue; - } - - // Parse link - in Atom links are elements with href attributes - const link = getAttributeValue(entry, "link", "href"); - - // For content, check content tag first, then summary - const content = getTextContent(entry, "content") || - getTextContent(entry, "summary"); - - // For author, it might be nested as Author - let author = ""; - const authorEl = entry.getElementsByTagName("author")[0]; - if (authorEl) { - author = getTextContent(authorEl, "name"); - } - - // For pubDate, Atom uses or - const pubDate = getTextContent(entry, "published") || - getTextContent(entry, "updated"); - - retrievedItems.push({ - id, - title: getTextContent(entry, "title"), - link, - description: getTextContent(entry, "summary"), - pubDate, - author, - content, - }); - } - } else { - // Parse RSS feed - const rssItems = doc.getElementsByTagName("item"); - - for (let i = 0; i < Math.min(rssItems.length, maxResults); i++) { - const item = rssItems[i]; - - const id = getTextContent(item, "guid") || - getTextContent(item, "link") || - Math.random().toString(36); - - if (existingIds.has(id)) { - continue; - } - - retrievedItems.push({ - id, - title: getTextContent(item, "title"), - link: getTextContent(item, "link"), - description: getTextContent(item, "description"), - pubDate: getTextContent(item, "pubDate"), - author: getTextContent(item, "author"), - content: getTextContent(item, "content:encoded") || - getTextContent(item, "description"), - }); - } - } - - return retrievedItems; -} - -const feedUpdater = lift<{ - items: FeedItem[]; +const feedUpdater = handler; settings: Settings; -}>(({ settings, items }) => { +}>((_, { items, settings }) => { if (!settings.feedUrl) { console.warn("no feed URL provided"); return; @@ -140,13 +29,13 @@ const feedUpdater = lift<{ const query = fetchData({ url: settings.feedUrl, mode: "text" }); return derive( - { items, result: query.result, limit: settings.limit }, - ({ result, limit, items }) => { - if (!result || typeof result !== "string") return; + { items, query, limit: settings.limit }, + ({ query, limit, items }) => { + if (!query.result || typeof query.result !== "string") return; const newEntries = parseRSSFeed( - result as string, + query.result as string, limit, - new Set(items.map((item) => item.id)), + new Set(items.get().map((item) => item.id)), ); items.push(...newEntries); },