Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions recipes/dom-parser.test.ts → recipes/rss-utils.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { DOMParser } from "./dom-parser.ts";
import { assert } from "@std/assert";
import { assert, assertObjectMatch } from "@std/assert";
import { FeedItem, parseRSSFeed } from "./rss-utils.ts";

const xml = `<?xml version="1.0" encoding="UTF-8"?>
<feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
Expand Down Expand Up @@ -38,7 +39,7 @@ const xml = `<?xml version="1.0" encoding="UTF-8"?>
</feed>
`;

Deno.test("DOMParser parsers XML", () => {
Deno.test("DOMParser/XML", () => {
const parser = new DOMParser();
const doc = parser.parseFromString(xml, "text/xml");
const entries = doc.getElementsByTagName("entry");
Expand All @@ -60,3 +61,24 @@ Deno.test("DOMParser parsers XML", () => {
"Get textContent",
);
});

Deno.test("parseRSSFeed()", () => {
const entries = parseRSSFeed(
xml,
5,
new Set(["tag:www.githubstatus.com,2005:Incident/26833707"]),
);
assert(
entries.length === 2,
"Expecting 2 entries after filtering one existing",
);
assertObjectMatch(entries[0], {
author: "",
id: "tag:www.githubstatus.com,2005:Incident/26837586",
pubDate: "2025-10-21T17:39:34Z",
title: "Disruption with some GitHub services",
link: "https://www.githubstatus.com/incidents/v61nk2fpysnq",
content:
"&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;17:39&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Resolved&lt;/strong&gt; - This incident has been resolved. Thank you for your patience and understanding as we addressed this issue. A detailed root cause analysis will be shared as soon as it is available.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;17:18&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - Mitigation continues, the impact is limited to Enterprise Cloud customers who have configured SAML at the organization level.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;17:11&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - We continuing to work on mitigation of this issue.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;16:33&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - We’ve identified the issue affecting some users with SAML/OIDC authentication and are actively working on mitigation. Some users may not be able to authenticate during this time.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;16:03&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - We're seeing issues for a small amount of customers with SAML/OIDC authentication for GitHub.com users. We are investigating.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;16:00&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Investigating&lt;/strong&gt; - We are currently investigating this issue.&lt;/p&gt;",
} as FeedItem);
});
113 changes: 113 additions & 0 deletions recipes/rss-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import { DOMParser, type Element } from "./dom-parser.ts";

export type FeedItem = {
id: string;
title: string;
link: string;
description: string;
pubDate: string;
author: string;
content: string;
};

export function parseRSSFeed(
textXML: string,
maxResults: number = 100,
existingIds: Set<string>,
): FeedItem[] {
const parser = new DOMParser();
const doc = parser.parseFromString(textXML, "text/xml");
// Helper function to get text content from an element
const getTextContent = (element: Element | null, tagName: string) => {
const el = element?.getElementsByTagName(tagName)[0];
return el?.textContent?.trim() || "";
};

// Helper function to get attribute value
const getAttributeValue = (
element: Element | null,
tagName: string,
attrName: string,
) => {
const el = element?.getElementsByTagName(tagName)[0];
return el?.getAttribute(attrName) || "";
};

const retrievedItems: FeedItem[] = [];

// Check if it's an Atom feed
const isAtom = doc.getElementsByTagName("feed").length !== 0;

if (isAtom) {
// Parse Atom feed
const entries = doc.getElementsByTagName("entry");

for (let i = 0; i < Math.min(entries.length, maxResults); i++) {
const entry = entries[i];

// In Atom, id is mandatory
const id = getTextContent(entry, "id") || Math.random().toString(36);

// Skip if we already have this item
if (existingIds.has(id)) {
continue;
}

// Parse link - in Atom links are elements with href attributes
const link = getAttributeValue(entry, "link", "href");

// For content, check content tag first, then summary
const content = getTextContent(entry, "content") ||
getTextContent(entry, "summary");

// For author, it might be nested as <author><name>Author</name></author>
let author = "";
const authorEl = entry.getElementsByTagName("author")[0];
if (authorEl) {
author = getTextContent(authorEl, "name");
}

// For pubDate, Atom uses <published> or <updated>
const pubDate = getTextContent(entry, "published") ||
getTextContent(entry, "updated");

retrievedItems.push({
id,
title: getTextContent(entry, "title"),
link,
description: getTextContent(entry, "summary"),
pubDate,
author,
content,
});
}
} else {
// Parse RSS feed
const rssItems = doc.getElementsByTagName("item");

for (let i = 0; i < Math.min(rssItems.length, maxResults); i++) {
const item = rssItems[i];

const id = getTextContent(item, "guid") ||
getTextContent(item, "link") ||
Math.random().toString(36);

if (existingIds.has(id)) {
continue;
}

retrievedItems.push({
id,
title: getTextContent(item, "title"),
link: getTextContent(item, "link"),
description: getTextContent(item, "description"),
pubDate: getTextContent(item, "pubDate"),
author: getTextContent(item, "author"),
content: getTextContent(item, "content:encoded") ||
getTextContent(item, "description"),
});
}
}

return retrievedItems;
}
133 changes: 11 additions & 122 deletions recipes/rss.tsx
Original file line number Diff line number Diff line change
@@ -1,152 +1,41 @@
/// <cts-enable />
import {
Cell,
cell,
Default,
derive,
fetchData,
lift,
handler,
NAME,
recipe,
str,
UI,
} from "commontools";
import { DOMParser, type Element } from "./dom-parser.ts";
import { type FeedItem, parseRSSFeed } from "./rss-utils.ts";

interface Settings {
feedUrl: Default<string, "">;
limit: Default<number, 100>;
}

type FeedItem = {
id: string;
title: string;
link: string;
description: string;
pubDate: string;
author: string;
content: string;
};

function parseRSSFeed(
textXML: string,
maxResults: number = 100,
existingIds: Set<string>,
): FeedItem[] {
const parser = new DOMParser();
const doc = parser.parseFromString(textXML, "text/xml");
// Helper function to get text content from an element
const getTextContent = (element: Element | null, tagName: string) => {
const el = element?.getElementsByTagName(tagName)[0];
return el?.textContent?.trim() || "";
};

// Helper function to get attribute value
const getAttributeValue = (
element: Element | null,
tagName: string,
attrName: string,
) => {
const el = element?.getElementsByTagName(tagName)[0];
return el?.getAttribute(attrName) || "";
};

const retrievedItems: FeedItem[] = [];

// Check if it's an Atom feed
const isAtom = doc.getElementsByTagName("feed").length !== 0;

if (isAtom) {
// Parse Atom feed
const entries = doc.getElementsByTagName("entry");

for (let i = 0; i < Math.min(entries.length, maxResults); i++) {
const entry = entries[i];

// In Atom, id is mandatory
const id = getTextContent(entry, "id") || Math.random().toString(36);

// Skip if we already have this item
if (existingIds.has(id)) {
continue;
}

// Parse link - in Atom links are elements with href attributes
const link = getAttributeValue(entry, "link", "href");

// For content, check content tag first, then summary
const content = getTextContent(entry, "content") ||
getTextContent(entry, "summary");

// For author, it might be nested as <author><name>Author</name></author>
let author = "";
const authorEl = entry.getElementsByTagName("author")[0];
if (authorEl) {
author = getTextContent(authorEl, "name");
}

// For pubDate, Atom uses <published> or <updated>
const pubDate = getTextContent(entry, "published") ||
getTextContent(entry, "updated");

retrievedItems.push({
id,
title: getTextContent(entry, "title"),
link,
description: getTextContent(entry, "summary"),
pubDate,
author,
content,
});
}
} else {
// Parse RSS feed
const rssItems = doc.getElementsByTagName("item");

for (let i = 0; i < Math.min(rssItems.length, maxResults); i++) {
const item = rssItems[i];

const id = getTextContent(item, "guid") ||
getTextContent(item, "link") ||
Math.random().toString(36);

if (existingIds.has(id)) {
continue;
}

retrievedItems.push({
id,
title: getTextContent(item, "title"),
link: getTextContent(item, "link"),
description: getTextContent(item, "description"),
pubDate: getTextContent(item, "pubDate"),
author: getTextContent(item, "author"),
content: getTextContent(item, "content:encoded") ||
getTextContent(item, "description"),
});
}
}

return retrievedItems;
}

const feedUpdater = lift<{
items: FeedItem[];
const feedUpdater = handler<never, {
items: Cell<FeedItem[]>;
settings: Settings;
}>(({ settings, items }) => {
}>((_, { items, settings }) => {
if (!settings.feedUrl) {
console.warn("no feed URL provided");
return;
}

const query = fetchData({ url: settings.feedUrl, mode: "text" });
return derive(
{ items, result: query.result, limit: settings.limit },
({ result, limit, items }) => {
if (!result || typeof result !== "string") return;
{ items, query, limit: settings.limit },
({ query, limit, items }) => {
if (!query.result || typeof query.result !== "string") return;
const newEntries = parseRSSFeed(
result as string,
query.result as string,
limit,
new Set(items.map((item) => item.id)),
new Set(items.get().map((item) => item.id)),
);
items.push(...newEntries);
},
Expand Down