chore: Update rss recipe to use handlers again, pull out and test RSS parsing (#1931)

jsantell · web-flow · commit 7b2cdc2d13fb · 2025-10-21T15:39:18.000-07:00
diff --git a/recipes/rss-utils.test.ts b/recipes/rss-utils.test.ts
@@ -1,5 +1,6 @@
 import { DOMParser } from "./dom-parser.ts";
-import { assert } from "@std/assert";
+import { assert, assertObjectMatch } from "@std/assert";
+import { FeedItem, parseRSSFeed } from "./rss-utils.ts";
 
 const xml = `<?xml version="1.0" encoding="UTF-8"?>
 <feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
@@ -38,7 +39,7 @@ const xml = `<?xml version="1.0" encoding="UTF-8"?>
 </feed>
 `;
 
-Deno.test("DOMParser parsers XML", () => {
+Deno.test("DOMParser/XML", () => {
   const parser = new DOMParser();
   const doc = parser.parseFromString(xml, "text/xml");
   const entries = doc.getElementsByTagName("entry");
@@ -60,3 +61,24 @@ Deno.test("DOMParser parsers XML", () => {
     "Get textContent",
   );
 });
+
+Deno.test("parseRSSFeed()", () => {
+  const entries = parseRSSFeed(
+    xml,
+    5,
+    new Set(["tag:www.githubstatus.com,2005:Incident/26833707"]),
+  );
+  assert(
+    entries.length === 2,
+    "Expecting 2 entries after filtering one existing",
+  );
+  assertObjectMatch(entries[0], {
+    author: "",
+    id: "tag:www.githubstatus.com,2005:Incident/26837586",
+    pubDate: "2025-10-21T17:39:34Z",
+    title: "Disruption with some GitHub services",
+    link: "https://www.githubstatus.com/incidents/v61nk2fpysnq",
+    content:
+      "&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;17:39&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Resolved&lt;/strong&gt; - This incident has been resolved. Thank you for your patience and understanding as we addressed this issue. A detailed root cause analysis will be shared as soon as it is available.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;17:18&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - Mitigation continues, the impact is limited to Enterprise Cloud customers who have configured SAML at the organization level.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;17:11&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - We continuing to work on mitigation of this issue.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;16:33&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - We’ve identified the issue affecting some users with SAML/OIDC authentication and are actively working on mitigation. Some users may not be able to authenticate during this time.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;16:03&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - We're seeing issues for a small amount of customers with SAML/OIDC authentication for GitHub.com users. We are investigating.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;16:00&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Investigating&lt;/strong&gt; - We are currently investigating this issue.&lt;/p&gt;",
+  } as FeedItem);
+});
diff --git a/recipes/rss-utils.ts b/recipes/rss-utils.ts
@@ -0,0 +1,113 @@
+import { DOMParser, type Element } from "./dom-parser.ts";
+
+export type FeedItem = {
+  id: string;
+  title: string;
+  link: string;
+  description: string;
+  pubDate: string;
+  author: string;
+  content: string;
+};
+
+export function parseRSSFeed(
+  textXML: string,
+  maxResults: number = 100,
+  existingIds: Set<string>,
+): FeedItem[] {
+  const parser = new DOMParser();
+  const doc = parser.parseFromString(textXML, "text/xml");
+  // Helper function to get text content from an element
+  const getTextContent = (element: Element | null, tagName: string) => {
+    const el = element?.getElementsByTagName(tagName)[0];
+    return el?.textContent?.trim() || "";
+  };
+
+  // Helper function to get attribute value
+  const getAttributeValue = (
+    element: Element | null,
+    tagName: string,
+    attrName: string,
+  ) => {
+    const el = element?.getElementsByTagName(tagName)[0];
+    return el?.getAttribute(attrName) || "";
+  };
+
+  const retrievedItems: FeedItem[] = [];
+
+  // Check if it's an Atom feed
+  const isAtom = doc.getElementsByTagName("feed").length !== 0;
+
+  if (isAtom) {
+    // Parse Atom feed
+    const entries = doc.getElementsByTagName("entry");
+
+    for (let i = 0; i < Math.min(entries.length, maxResults); i++) {
+      const entry = entries[i];
+
+      // In Atom, id is mandatory
+      const id = getTextContent(entry, "id") || Math.random().toString(36);
+
+      // Skip if we already have this item
+      if (existingIds.has(id)) {
+        continue;
+      }
+
+      // Parse link - in Atom links are elements with href attributes
+      const link = getAttributeValue(entry, "link", "href");
+
+      // For content, check content tag first, then summary
+      const content = getTextContent(entry, "content") ||
+        getTextContent(entry, "summary");
+
+      // For author, it might be nested as <author><name>Author</name></author>
+      let author = "";
+      const authorEl = entry.getElementsByTagName("author")[0];
+      if (authorEl) {
+        author = getTextContent(authorEl, "name");
+      }
+
+      // For pubDate, Atom uses <published> or <updated>
+      const pubDate = getTextContent(entry, "published") ||
+        getTextContent(entry, "updated");
+
+      retrievedItems.push({
+        id,
+        title: getTextContent(entry, "title"),
+        link,
+        description: getTextContent(entry, "summary"),
+        pubDate,
+        author,
+        content,
+      });
+    }
+  } else {
+    // Parse RSS feed
+    const rssItems = doc.getElementsByTagName("item");
+
+    for (let i = 0; i < Math.min(rssItems.length, maxResults); i++) {
+      const item = rssItems[i];
+
+      const id = getTextContent(item, "guid") ||
+        getTextContent(item, "link") ||
+        Math.random().toString(36);
+
+      if (existingIds.has(id)) {
+        continue;
+      }
+
+      retrievedItems.push({
+        id,
+        title: getTextContent(item, "title"),
+        link: getTextContent(item, "link"),
+        description: getTextContent(item, "description"),
+        pubDate: getTextContent(item, "pubDate"),
+        author: getTextContent(item, "author"),
+        content: getTextContent(item, "content:encoded") ||
+          getTextContent(item, "description"),
+      });
+    }
+  }
+
+  return retrievedItems;
+}
diff --git a/recipes/rss.tsx b/recipes/rss.tsx
@@ -1,152 +1,41 @@
 /// <cts-enable />
 import {
+  Cell,
   cell,
   Default,
   derive,
   fetchData,
-  lift,
+  handler,
   NAME,
   recipe,
   str,
   UI,
 } from "commontools";
-import { DOMParser, type Element } from "./dom-parser.ts";
+import { type FeedItem, parseRSSFeed } from "./rss-utils.ts";
 
 interface Settings {
   feedUrl: Default<string, "">;
   limit: Default<number, 100>;
 }
 
-type FeedItem = {
-  id: string;
-  title: string;
-  link: string;
-  description: string;
-  pubDate: string;
-  author: string;
-  content: string;
-};
-
-function parseRSSFeed(
-  textXML: string,
-  maxResults: number = 100,
-  existingIds: Set<string>,
-): FeedItem[] {
-  const parser = new DOMParser();
-  const doc = parser.parseFromString(textXML, "text/xml");
-  // Helper function to get text content from an element
-  const getTextContent = (element: Element | null, tagName: string) => {
-    const el = element?.getElementsByTagName(tagName)[0];
-    return el?.textContent?.trim() || "";
-  };
-
-  // Helper function to get attribute value
-  const getAttributeValue = (
-    element: Element | null,
-    tagName: string,
-    attrName: string,
-  ) => {
-    const el = element?.getElementsByTagName(tagName)[0];
-    return el?.getAttribute(attrName) || "";
-  };
-
-  const retrievedItems: FeedItem[] = [];
-
-  // Check if it's an Atom feed
-  const isAtom = doc.getElementsByTagName("feed").length !== 0;
-
-  if (isAtom) {
-    // Parse Atom feed
-    const entries = doc.getElementsByTagName("entry");
-
-    for (let i = 0; i < Math.min(entries.length, maxResults); i++) {
-      const entry = entries[i];
-
-      // In Atom, id is mandatory
-      const id = getTextContent(entry, "id") || Math.random().toString(36);
-
-      // Skip if we already have this item
-      if (existingIds.has(id)) {
-        continue;
-      }
-
-      // Parse link - in Atom links are elements with href attributes
-      const link = getAttributeValue(entry, "link", "href");
-
-      // For content, check content tag first, then summary
-      const content = getTextContent(entry, "content") ||
-        getTextContent(entry, "summary");
-
-      // For author, it might be nested as <author><name>Author</name></author>
-      let author = "";
-      const authorEl = entry.getElementsByTagName("author")[0];
-      if (authorEl) {
-        author = getTextContent(authorEl, "name");
-      }
-
-      // For pubDate, Atom uses <published> or <updated>
-      const pubDate = getTextContent(entry, "published") ||
-        getTextContent(entry, "updated");
-
-      retrievedItems.push({
-        id,
-        title: getTextContent(entry, "title"),
-        link,
-        description: getTextContent(entry, "summary"),
-        pubDate,
-        author,
-        content,
-      });
-    }
-  } else {
-    // Parse RSS feed
-    const rssItems = doc.getElementsByTagName("item");
-
-    for (let i = 0; i < Math.min(rssItems.length, maxResults); i++) {
-      const item = rssItems[i];
-
-      const id = getTextContent(item, "guid") ||
-        getTextContent(item, "link") ||
-        Math.random().toString(36);
-
-      if (existingIds.has(id)) {
-        continue;
-      }
-
-      retrievedItems.push({
-        id,
-        title: getTextContent(item, "title"),
-        link: getTextContent(item, "link"),
-        description: getTextContent(item, "description"),
-        pubDate: getTextContent(item, "pubDate"),
-        author: getTextContent(item, "author"),
-        content: getTextContent(item, "content:encoded") ||
-          getTextContent(item, "description"),
-      });
-    }
-  }
-
-  return retrievedItems;
-}
-
-const feedUpdater = lift<{
-  items: FeedItem[];
+const feedUpdater = handler<never, {
+  items: Cell<FeedItem[]>;
   settings: Settings;
-}>(({ settings, items }) => {
+}>((_, { items, settings }) => {
   if (!settings.feedUrl) {
     console.warn("no feed URL provided");
     return;
   }
 
   const query = fetchData({ url: settings.feedUrl, mode: "text" });
   return derive(
-    { items, result: query.result, limit: settings.limit },
-    ({ result, limit, items }) => {
-      if (!result || typeof result !== "string") return;
+    { items, query, limit: settings.limit },
+    ({ query, limit, items }) => {
+      if (!query.result || typeof query.result !== "string") return;
       const newEntries = parseRSSFeed(
-        result as string,
+        query.result as string,
         limit,
-        new Set(items.map((item) => item.id)),
+        new Set(items.get().map((item) => item.id)),
       );
       items.push(...newEntries);
     },